Skip to main content

asmkit/x86/features/
AVX.rs

1use crate::x86::assembler::*;
2use crate::x86::operands::*;
3use super::super::opcodes::*;
4use crate::core::emitter::*;
5use crate::core::operand::*;
6
7/// A dummy operand that represents no register. Here just for simplicity.
8const NOREG: Operand = Operand::new();
9
10/// `VADDSUBPD` (VADDSUBPD). 
11/// Adds odd-numbered double precision floating-point values of the first source operand (second operand) with the corresponding double precision floating-point values from the second source operand (third operand); stores the result in the odd-numbered values of the destination operand (first operand). Subtracts the even-numbered double precision floating-point values from the second source operand from the corresponding double precision floating values in the first source operand; stores the result into the even-numbered values of the destination operand.
12///
13///
14/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSUBPD.html).
15///
16/// Supported operand variants:
17///
18/// ```text
19/// +---+---------------+
20/// | # | Operands      |
21/// +---+---------------+
22/// | 1 | Xmm, Xmm, Mem |
23/// | 2 | Xmm, Xmm, Xmm |
24/// | 3 | Ymm, Ymm, Mem |
25/// | 4 | Ymm, Ymm, Ymm |
26/// +---+---------------+
27/// ```
28pub trait VaddsubpdEmitter<A, B, C> {
29    fn vaddsubpd(&mut self, op0: A, op1: B, op2: C);
30}
31
32impl<'a> VaddsubpdEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
33    fn vaddsubpd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
34        self.emit(VADDSUBPD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
35    }
36}
37
38impl<'a> VaddsubpdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
39    fn vaddsubpd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
40        self.emit(VADDSUBPD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
41    }
42}
43
44impl<'a> VaddsubpdEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
45    fn vaddsubpd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
46        self.emit(VADDSUBPD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
47    }
48}
49
50impl<'a> VaddsubpdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
51    fn vaddsubpd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
52        self.emit(VADDSUBPD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
53    }
54}
55
56/// `VADDSUBPS` (VADDSUBPS). 
57/// Adds odd-numbered single precision floating-point values of the first source operand (second operand) with the corresponding single precision floating-point values from the second source operand (third operand); stores the result in the odd-numbered values of the destination operand (first operand). Subtracts the even-numbered single precision floating-point values from the second source operand from the corresponding single precision floating values in the first source operand; stores the result into the even-numbered values of the destination operand.
58///
59///
60/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSUBPS.html).
61///
62/// Supported operand variants:
63///
64/// ```text
65/// +---+---------------+
66/// | # | Operands      |
67/// +---+---------------+
68/// | 1 | Xmm, Xmm, Mem |
69/// | 2 | Xmm, Xmm, Xmm |
70/// | 3 | Ymm, Ymm, Mem |
71/// | 4 | Ymm, Ymm, Ymm |
72/// +---+---------------+
73/// ```
74pub trait VaddsubpsEmitter<A, B, C> {
75    fn vaddsubps(&mut self, op0: A, op1: B, op2: C);
76}
77
78impl<'a> VaddsubpsEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
79    fn vaddsubps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
80        self.emit(VADDSUBPS128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
81    }
82}
83
84impl<'a> VaddsubpsEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
85    fn vaddsubps(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
86        self.emit(VADDSUBPS128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
87    }
88}
89
90impl<'a> VaddsubpsEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
91    fn vaddsubps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
92        self.emit(VADDSUBPS256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
93    }
94}
95
96impl<'a> VaddsubpsEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
97    fn vaddsubps(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
98        self.emit(VADDSUBPS256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
99    }
100}
101
102/// `VBLENDPD` (VBLENDPD). 
103/// Double-precision floating-point values from the second source operand (third operand) are conditionally merged with values from the first source operand (second operand) and written to the destination operand (first operand). The immediate bits [3:0] determine whether the corresponding double precision floating-point value in the destination is copied from the second source or first source. If a bit in the mask, corresponding to a word, is ”1”, then the double precision floating-point value in the second source operand is copied, else the value in the first source operand is copied.
104///
105///
106/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDPD.html).
107///
108/// Supported operand variants:
109///
110/// ```text
111/// +---+--------------------+
112/// | # | Operands           |
113/// +---+--------------------+
114/// | 1 | Xmm, Xmm, Mem, Imm |
115/// | 2 | Xmm, Xmm, Xmm, Imm |
116/// | 3 | Ymm, Ymm, Mem, Imm |
117/// | 4 | Ymm, Ymm, Ymm, Imm |
118/// +---+--------------------+
119/// ```
120pub trait VblendpdEmitter<A, B, C, D> {
121    fn vblendpd(&mut self, op0: A, op1: B, op2: C, op3: D);
122}
123
124impl<'a> VblendpdEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
125    fn vblendpd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
126        self.emit(VBLENDPD128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
127    }
128}
129
130impl<'a> VblendpdEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
131    fn vblendpd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
132        self.emit(VBLENDPD128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
133    }
134}
135
136impl<'a> VblendpdEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
137    fn vblendpd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
138        self.emit(VBLENDPD256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
139    }
140}
141
142impl<'a> VblendpdEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
143    fn vblendpd(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
144        self.emit(VBLENDPD256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
145    }
146}
147
148/// `VBLENDPS` (VBLENDPS). 
149/// Packed single precision floating-point values from the second source operand (third operand) are conditionally merged with values from the first source operand (second operand) and written to the destination operand (first operand). The immediate bits [7:0] determine whether the corresponding single precision floating-point value in the destination is copied from the second source or first source. If a bit in the mask, corresponding to a word, is “1”, then the single precision floating-point value in the second source operand is copied, else the value in the first source operand is copied.
150///
151///
152/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDPS.html).
153///
154/// Supported operand variants:
155///
156/// ```text
157/// +---+--------------------+
158/// | # | Operands           |
159/// +---+--------------------+
160/// | 1 | Xmm, Xmm, Mem, Imm |
161/// | 2 | Xmm, Xmm, Xmm, Imm |
162/// | 3 | Ymm, Ymm, Mem, Imm |
163/// | 4 | Ymm, Ymm, Ymm, Imm |
164/// +---+--------------------+
165/// ```
166pub trait VblendpsEmitter<A, B, C, D> {
167    fn vblendps(&mut self, op0: A, op1: B, op2: C, op3: D);
168}
169
170impl<'a> VblendpsEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
171    fn vblendps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
172        self.emit(VBLENDPS128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
173    }
174}
175
176impl<'a> VblendpsEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
177    fn vblendps(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
178        self.emit(VBLENDPS128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
179    }
180}
181
182impl<'a> VblendpsEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
183    fn vblendps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
184        self.emit(VBLENDPS256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
185    }
186}
187
188impl<'a> VblendpsEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
189    fn vblendps(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
190        self.emit(VBLENDPS256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
191    }
192}
193
194/// `VBLENDVPD` (VBLENDVPD). 
195/// Conditionally copy each quadword data element of double precision floating-point value from the second source operand and the first source operand depending on mask bits defined in the mask register operand. The mask bits are the most significant bit in each quadword element of the mask register.
196///
197///
198/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDVPD.html).
199///
200/// Supported operand variants:
201///
202/// ```text
203/// +---+--------------------+
204/// | # | Operands           |
205/// +---+--------------------+
206/// | 1 | Xmm, Xmm, Mem, Xmm |
207/// | 2 | Xmm, Xmm, Xmm, Xmm |
208/// | 3 | Ymm, Ymm, Mem, Ymm |
209/// | 4 | Ymm, Ymm, Ymm, Ymm |
210/// +---+--------------------+
211/// ```
212pub trait VblendvpdEmitter<A, B, C, D> {
213    fn vblendvpd(&mut self, op0: A, op1: B, op2: C, op3: D);
214}
215
216impl<'a> VblendvpdEmitter<Xmm, Xmm, Xmm, Xmm> for Assembler<'a> {
217    fn vblendvpd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Xmm) {
218        self.emit(VBLENDVPD128RRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
219    }
220}
221
222impl<'a> VblendvpdEmitter<Xmm, Xmm, Mem, Xmm> for Assembler<'a> {
223    fn vblendvpd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Xmm) {
224        self.emit(VBLENDVPD128RRMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
225    }
226}
227
228impl<'a> VblendvpdEmitter<Ymm, Ymm, Ymm, Ymm> for Assembler<'a> {
229    fn vblendvpd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Ymm) {
230        self.emit(VBLENDVPD256RRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
231    }
232}
233
234impl<'a> VblendvpdEmitter<Ymm, Ymm, Mem, Ymm> for Assembler<'a> {
235    fn vblendvpd(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Ymm) {
236        self.emit(VBLENDVPD256RRMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
237    }
238}
239
240/// `VBLENDVPS` (VBLENDVPS). 
241/// Conditionally copy each dword data element of single precision floating-point value from the second source operand and the first source operand depending on mask bits defined in the mask register operand. The mask bits are the most significant bit in each dword element of the mask register.
242///
243///
244/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDVPS.html).
245///
246/// Supported operand variants:
247///
248/// ```text
249/// +---+--------------------+
250/// | # | Operands           |
251/// +---+--------------------+
252/// | 1 | Xmm, Xmm, Mem, Xmm |
253/// | 2 | Xmm, Xmm, Xmm, Xmm |
254/// | 3 | Ymm, Ymm, Mem, Ymm |
255/// | 4 | Ymm, Ymm, Ymm, Ymm |
256/// +---+--------------------+
257/// ```
258pub trait VblendvpsEmitter<A, B, C, D> {
259    fn vblendvps(&mut self, op0: A, op1: B, op2: C, op3: D);
260}
261
262impl<'a> VblendvpsEmitter<Xmm, Xmm, Xmm, Xmm> for Assembler<'a> {
263    fn vblendvps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Xmm) {
264        self.emit(VBLENDVPS128RRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
265    }
266}
267
268impl<'a> VblendvpsEmitter<Xmm, Xmm, Mem, Xmm> for Assembler<'a> {
269    fn vblendvps(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Xmm) {
270        self.emit(VBLENDVPS128RRMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
271    }
272}
273
274impl<'a> VblendvpsEmitter<Ymm, Ymm, Ymm, Ymm> for Assembler<'a> {
275    fn vblendvps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Ymm) {
276        self.emit(VBLENDVPS256RRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
277    }
278}
279
280impl<'a> VblendvpsEmitter<Ymm, Ymm, Mem, Ymm> for Assembler<'a> {
281    fn vblendvps(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Ymm) {
282        self.emit(VBLENDVPS256RRMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
283    }
284}
285
286/// `VBROADCASTF128` (VBROADCASTF128). 
287/// VBROADCASTSD/VBROADCASTSS/VBROADCASTF128 load floating-point values as one tuple from the source operand (second operand) in memory and broadcast to all elements of the destination operand (first operand).
288///
289///
290/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VBROADCAST.html).
291///
292/// Supported operand variants:
293///
294/// ```text
295/// +---+----------+
296/// | # | Operands |
297/// +---+----------+
298/// | 1 | Ymm, Mem |
299/// | 2 | Ymm, Xmm |
300/// +---+----------+
301/// ```
302pub trait Vbroadcastf128Emitter<A, B> {
303    fn vbroadcastf128(&mut self, op0: A, op1: B);
304}
305
306impl<'a> Vbroadcastf128Emitter<Ymm, Xmm> for Assembler<'a> {
307    fn vbroadcastf128(&mut self, op0: Ymm, op1: Xmm) {
308        self.emit(VBROADCASTF128_256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
309    }
310}
311
312impl<'a> Vbroadcastf128Emitter<Ymm, Mem> for Assembler<'a> {
313    fn vbroadcastf128(&mut self, op0: Ymm, op1: Mem) {
314        self.emit(VBROADCASTF128_256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
315    }
316}
317
318/// `VCMPPD` (VCMPPD). 
319/// Performs a SIMD compare of the packed double precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.
320///
321///
322/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPD.html).
323///
324/// Supported operand variants:
325///
326/// ```text
327/// +----+---------------------+
328/// | #  | Operands            |
329/// +----+---------------------+
330/// | 1  | KReg, Xmm, Mem, Imm |
331/// | 2  | KReg, Xmm, Xmm, Imm |
332/// | 3  | KReg, Ymm, Mem, Imm |
333/// | 4  | KReg, Ymm, Ymm, Imm |
334/// | 5  | KReg, Zmm, Mem, Imm |
335/// | 6  | KReg, Zmm, Zmm, Imm |
336/// | 7  | Xmm, Xmm, Mem, Imm  |
337/// | 8  | Xmm, Xmm, Xmm, Imm  |
338/// | 9  | Ymm, Ymm, Mem, Imm  |
339/// | 10 | Ymm, Ymm, Ymm, Imm  |
340/// +----+---------------------+
341/// ```
342pub trait VcmppdEmitter<A, B, C, D> {
343    fn vcmppd(&mut self, op0: A, op1: B, op2: C, op3: D);
344}
345
346impl<'a> VcmppdEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
347    fn vcmppd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
348        self.emit(VCMPPD128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
349    }
350}
351
352impl<'a> VcmppdEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
353    fn vcmppd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
354        self.emit(VCMPPD128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
355    }
356}
357
358impl<'a> VcmppdEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
359    fn vcmppd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
360        self.emit(VCMPPD256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
361    }
362}
363
364impl<'a> VcmppdEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
365    fn vcmppd(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
366        self.emit(VCMPPD256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
367    }
368}
369
370impl<'a> VcmppdEmitter<KReg, Xmm, Xmm, Imm> for Assembler<'a> {
371    fn vcmppd(&mut self, op0: KReg, op1: Xmm, op2: Xmm, op3: Imm) {
372        self.emit(VCMPPD128KRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
373    }
374}
375
376impl<'a> VcmppdEmitter<KReg, Xmm, Mem, Imm> for Assembler<'a> {
377    fn vcmppd(&mut self, op0: KReg, op1: Xmm, op2: Mem, op3: Imm) {
378        self.emit(VCMPPD128KRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
379    }
380}
381
382impl<'a> VcmppdEmitter<KReg, Ymm, Ymm, Imm> for Assembler<'a> {
383    fn vcmppd(&mut self, op0: KReg, op1: Ymm, op2: Ymm, op3: Imm) {
384        self.emit(VCMPPD256KRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
385    }
386}
387
388impl<'a> VcmppdEmitter<KReg, Ymm, Mem, Imm> for Assembler<'a> {
389    fn vcmppd(&mut self, op0: KReg, op1: Ymm, op2: Mem, op3: Imm) {
390        self.emit(VCMPPD256KRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
391    }
392}
393
394impl<'a> VcmppdEmitter<KReg, Zmm, Zmm, Imm> for Assembler<'a> {
395    fn vcmppd(&mut self, op0: KReg, op1: Zmm, op2: Zmm, op3: Imm) {
396        self.emit(VCMPPD512KRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
397    }
398}
399
400impl<'a> VcmppdEmitter<KReg, Zmm, Mem, Imm> for Assembler<'a> {
401    fn vcmppd(&mut self, op0: KReg, op1: Zmm, op2: Mem, op3: Imm) {
402        self.emit(VCMPPD512KRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
403    }
404}
405
406/// `VCMPPS` (VCMPPS). 
407/// Performs a SIMD compare of the packed single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
408///
409///
410/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPS.html).
411///
412/// Supported operand variants:
413///
414/// ```text
415/// +----+---------------------+
416/// | #  | Operands            |
417/// +----+---------------------+
418/// | 1  | KReg, Xmm, Mem, Imm |
419/// | 2  | KReg, Xmm, Xmm, Imm |
420/// | 3  | KReg, Ymm, Mem, Imm |
421/// | 4  | KReg, Ymm, Ymm, Imm |
422/// | 5  | KReg, Zmm, Mem, Imm |
423/// | 6  | KReg, Zmm, Zmm, Imm |
424/// | 7  | Xmm, Xmm, Mem, Imm  |
425/// | 8  | Xmm, Xmm, Xmm, Imm  |
426/// | 9  | Ymm, Ymm, Mem, Imm  |
427/// | 10 | Ymm, Ymm, Ymm, Imm  |
428/// +----+---------------------+
429/// ```
430pub trait VcmppsEmitter<A, B, C, D> {
431    fn vcmpps(&mut self, op0: A, op1: B, op2: C, op3: D);
432}
433
434impl<'a> VcmppsEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
435    fn vcmpps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
436        self.emit(VCMPPS128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
437    }
438}
439
440impl<'a> VcmppsEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
441    fn vcmpps(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
442        self.emit(VCMPPS128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
443    }
444}
445
446impl<'a> VcmppsEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
447    fn vcmpps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
448        self.emit(VCMPPS256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
449    }
450}
451
452impl<'a> VcmppsEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
453    fn vcmpps(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
454        self.emit(VCMPPS256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
455    }
456}
457
458impl<'a> VcmppsEmitter<KReg, Xmm, Xmm, Imm> for Assembler<'a> {
459    fn vcmpps(&mut self, op0: KReg, op1: Xmm, op2: Xmm, op3: Imm) {
460        self.emit(VCMPPS128KRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
461    }
462}
463
464impl<'a> VcmppsEmitter<KReg, Xmm, Mem, Imm> for Assembler<'a> {
465    fn vcmpps(&mut self, op0: KReg, op1: Xmm, op2: Mem, op3: Imm) {
466        self.emit(VCMPPS128KRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
467    }
468}
469
470impl<'a> VcmppsEmitter<KReg, Ymm, Ymm, Imm> for Assembler<'a> {
471    fn vcmpps(&mut self, op0: KReg, op1: Ymm, op2: Ymm, op3: Imm) {
472        self.emit(VCMPPS256KRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
473    }
474}
475
476impl<'a> VcmppsEmitter<KReg, Ymm, Mem, Imm> for Assembler<'a> {
477    fn vcmpps(&mut self, op0: KReg, op1: Ymm, op2: Mem, op3: Imm) {
478        self.emit(VCMPPS256KRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
479    }
480}
481
482impl<'a> VcmppsEmitter<KReg, Zmm, Zmm, Imm> for Assembler<'a> {
483    fn vcmpps(&mut self, op0: KReg, op1: Zmm, op2: Zmm, op3: Imm) {
484        self.emit(VCMPPS512KRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
485    }
486}
487
488impl<'a> VcmppsEmitter<KReg, Zmm, Mem, Imm> for Assembler<'a> {
489    fn vcmpps(&mut self, op0: KReg, op1: Zmm, op2: Mem, op3: Imm) {
490        self.emit(VCMPPS512KRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
491    }
492}
493
494/// `VCMPSD`.
495///
496/// Supported operand variants:
497///
498/// ```text
499/// +---+---------------------+
500/// | # | Operands            |
501/// +---+---------------------+
502/// | 1 | KReg, Xmm, Mem, Imm |
503/// | 2 | KReg, Xmm, Xmm, Imm |
504/// | 3 | Xmm, Xmm, Mem, Imm  |
505/// | 4 | Xmm, Xmm, Xmm, Imm  |
506/// +---+---------------------+
507/// ```
508pub trait VcmpsdEmitter<A, B, C, D> {
509    fn vcmpsd(&mut self, op0: A, op1: B, op2: C, op3: D);
510}
511
512impl<'a> VcmpsdEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
513    fn vcmpsd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
514        self.emit(VCMPSDRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
515    }
516}
517
518impl<'a> VcmpsdEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
519    fn vcmpsd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
520        self.emit(VCMPSDRRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
521    }
522}
523
524impl<'a> VcmpsdEmitter<KReg, Xmm, Xmm, Imm> for Assembler<'a> {
525    fn vcmpsd(&mut self, op0: KReg, op1: Xmm, op2: Xmm, op3: Imm) {
526        self.emit(VCMPSDKRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
527    }
528}
529
530impl<'a> VcmpsdEmitter<KReg, Xmm, Mem, Imm> for Assembler<'a> {
531    fn vcmpsd(&mut self, op0: KReg, op1: Xmm, op2: Mem, op3: Imm) {
532        self.emit(VCMPSDKRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
533    }
534}
535
536/// `VCMPSS` (VCMPSS). 
537/// Compares the low single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
538///
539///
540/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPSS.html).
541///
542/// Supported operand variants:
543///
544/// ```text
545/// +---+---------------------+
546/// | # | Operands            |
547/// +---+---------------------+
548/// | 1 | KReg, Xmm, Mem, Imm |
549/// | 2 | KReg, Xmm, Xmm, Imm |
550/// | 3 | Xmm, Xmm, Mem, Imm  |
551/// | 4 | Xmm, Xmm, Xmm, Imm  |
552/// +---+---------------------+
553/// ```
554pub trait VcmpssEmitter<A, B, C, D> {
555    fn vcmpss(&mut self, op0: A, op1: B, op2: C, op3: D);
556}
557
558impl<'a> VcmpssEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
559    fn vcmpss(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
560        self.emit(VCMPSSRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
561    }
562}
563
564impl<'a> VcmpssEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
565    fn vcmpss(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
566        self.emit(VCMPSSRRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
567    }
568}
569
570impl<'a> VcmpssEmitter<KReg, Xmm, Xmm, Imm> for Assembler<'a> {
571    fn vcmpss(&mut self, op0: KReg, op1: Xmm, op2: Xmm, op3: Imm) {
572        self.emit(VCMPSSKRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
573    }
574}
575
576impl<'a> VcmpssEmitter<KReg, Xmm, Mem, Imm> for Assembler<'a> {
577    fn vcmpss(&mut self, op0: KReg, op1: Xmm, op2: Mem, op3: Imm) {
578        self.emit(VCMPSSKRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
579    }
580}
581
582/// `VDPPD` (VDPPD). 
583/// Conditionally multiplies the packed double precision floating-point values in the destination operand (first operand) with the packed double precision floating-point values in the source (second operand) depending on a mask extracted from bits [5:4] of the immediate operand (third operand). If a condition mask bit is zero, the corresponding multiplication is replaced by a value of 0.0 in the manner described by Section 12.8.4 of Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
584///
585///
586/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DPPD.html).
587///
588/// Supported operand variants:
589///
590/// ```text
591/// +---+--------------------+
592/// | # | Operands           |
593/// +---+--------------------+
594/// | 1 | Xmm, Xmm, Mem, Imm |
595/// | 2 | Xmm, Xmm, Xmm, Imm |
596/// +---+--------------------+
597/// ```
598pub trait VdppdEmitter<A, B, C, D> {
599    fn vdppd(&mut self, op0: A, op1: B, op2: C, op3: D);
600}
601
602impl<'a> VdppdEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
603    fn vdppd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
604        self.emit(VDPPD128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
605    }
606}
607
608impl<'a> VdppdEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
609    fn vdppd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
610        self.emit(VDPPD128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
611    }
612}
613
614/// `VDPPS` (VDPPS). 
615/// Conditionally multiplies the packed single precision floating-point values in the destination operand (first operand) with the packed single precision floats in the source (second operand) depending on a mask extracted from the high 4 bits of the immediate byte (third operand). If a condition mask bit in imm8[7:4] is zero, the corresponding multiplication is replaced by a value of 0.0 in the manner described by Section 12.8.4 of Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
616///
617///
618/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DPPS.html).
619///
620/// Supported operand variants:
621///
622/// ```text
623/// +---+--------------------+
624/// | # | Operands           |
625/// +---+--------------------+
626/// | 1 | Xmm, Xmm, Mem, Imm |
627/// | 2 | Xmm, Xmm, Xmm, Imm |
628/// | 3 | Ymm, Ymm, Mem, Imm |
629/// | 4 | Ymm, Ymm, Ymm, Imm |
630/// +---+--------------------+
631/// ```
632pub trait VdppsEmitter<A, B, C, D> {
633    fn vdpps(&mut self, op0: A, op1: B, op2: C, op3: D);
634}
635
636impl<'a> VdppsEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
637    fn vdpps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
638        self.emit(VDPPS128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
639    }
640}
641
642impl<'a> VdppsEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
643    fn vdpps(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
644        self.emit(VDPPS128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
645    }
646}
647
648impl<'a> VdppsEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
649    fn vdpps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
650        self.emit(VDPPS256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
651    }
652}
653
654impl<'a> VdppsEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
655    fn vdpps(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
656        self.emit(VDPPS256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
657    }
658}
659
660/// `VEXTRACTF128` (VEXTRACTF128). 
661/// VEXTRACTF128/VEXTRACTF32x4 and VEXTRACTF64x2 extract 128-bits of single precision floating-point values from the source operand (the second operand) and store to the low 128-bit of the destination operand (the first operand). The 128-bit data extraction occurs at an 128-bit granular offset specified by imm8[0] (256-bit) or imm8[1:0] as the multiply factor. The destination may be either a vector register or an 128-bit memory location.
662///
663///
664/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VEXTRACTF128%3AVEXTRACTF32x4%3AVEXTRACTF64x2%3AVEXTRACTF32x8%3AVEXTRACTF64x4.html).
665///
666/// Supported operand variants:
667///
668/// ```text
669/// +---+---------------+
670/// | # | Operands      |
671/// +---+---------------+
672/// | 1 | Mem, Ymm, Imm |
673/// | 2 | Xmm, Ymm, Imm |
674/// +---+---------------+
675/// ```
676pub trait Vextractf128Emitter<A, B, C> {
677    fn vextractf128(&mut self, op0: A, op1: B, op2: C);
678}
679
680impl<'a> Vextractf128Emitter<Xmm, Ymm, Imm> for Assembler<'a> {
681    fn vextractf128(&mut self, op0: Xmm, op1: Ymm, op2: Imm) {
682        self.emit(VEXTRACTF128RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
683    }
684}
685
686impl<'a> Vextractf128Emitter<Mem, Ymm, Imm> for Assembler<'a> {
687    fn vextractf128(&mut self, op0: Mem, op1: Ymm, op2: Imm) {
688        self.emit(VEXTRACTF128MRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
689    }
690}
691
692/// `VHADDPD` (VHADDPD). 
693/// Adds the double precision floating-point values in the high and low quadwords of the destination operand and stores the result in the low quadword of the destination operand.
694///
695///
696/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HADDPD.html).
697///
698/// Supported operand variants:
699///
700/// ```text
701/// +---+---------------+
702/// | # | Operands      |
703/// +---+---------------+
704/// | 1 | Xmm, Xmm, Mem |
705/// | 2 | Xmm, Xmm, Xmm |
706/// | 3 | Ymm, Ymm, Mem |
707/// | 4 | Ymm, Ymm, Ymm |
708/// +---+---------------+
709/// ```
710pub trait VhaddpdEmitter<A, B, C> {
711    fn vhaddpd(&mut self, op0: A, op1: B, op2: C);
712}
713
714impl<'a> VhaddpdEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
715    fn vhaddpd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
716        self.emit(VHADDPD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
717    }
718}
719
720impl<'a> VhaddpdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
721    fn vhaddpd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
722        self.emit(VHADDPD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
723    }
724}
725
726impl<'a> VhaddpdEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
727    fn vhaddpd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
728        self.emit(VHADDPD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
729    }
730}
731
732impl<'a> VhaddpdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
733    fn vhaddpd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
734        self.emit(VHADDPD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
735    }
736}
737
738/// `VHADDPS` (VHADDPS). 
739/// Adds the single precision floating-point values in the first and second dwords of the destination operand and stores the result in the first dword of the destination operand.
740///
741///
742/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HADDPS.html).
743///
744/// Supported operand variants:
745///
746/// ```text
747/// +---+---------------+
748/// | # | Operands      |
749/// +---+---------------+
750/// | 1 | Xmm, Xmm, Mem |
751/// | 2 | Xmm, Xmm, Xmm |
752/// | 3 | Ymm, Ymm, Mem |
753/// | 4 | Ymm, Ymm, Ymm |
754/// +---+---------------+
755/// ```
756pub trait VhaddpsEmitter<A, B, C> {
757    fn vhaddps(&mut self, op0: A, op1: B, op2: C);
758}
759
760impl<'a> VhaddpsEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
761    fn vhaddps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
762        self.emit(VHADDPS128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
763    }
764}
765
766impl<'a> VhaddpsEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
767    fn vhaddps(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
768        self.emit(VHADDPS128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
769    }
770}
771
772impl<'a> VhaddpsEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
773    fn vhaddps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
774        self.emit(VHADDPS256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
775    }
776}
777
778impl<'a> VhaddpsEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
779    fn vhaddps(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
780        self.emit(VHADDPS256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
781    }
782}
783
784/// `VHSUBPD` (VHSUBPD). 
785/// The HSUBPD instruction subtracts horizontally the packed double precision floating-point numbers of both operands.
786///
787///
788/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HSUBPD.html).
789///
790/// Supported operand variants:
791///
792/// ```text
793/// +---+---------------+
794/// | # | Operands      |
795/// +---+---------------+
796/// | 1 | Xmm, Xmm, Mem |
797/// | 2 | Xmm, Xmm, Xmm |
798/// | 3 | Ymm, Ymm, Mem |
799/// | 4 | Ymm, Ymm, Ymm |
800/// +---+---------------+
801/// ```
802pub trait VhsubpdEmitter<A, B, C> {
803    fn vhsubpd(&mut self, op0: A, op1: B, op2: C);
804}
805
806impl<'a> VhsubpdEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
807    fn vhsubpd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
808        self.emit(VHSUBPD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
809    }
810}
811
812impl<'a> VhsubpdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
813    fn vhsubpd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
814        self.emit(VHSUBPD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
815    }
816}
817
818impl<'a> VhsubpdEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
819    fn vhsubpd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
820        self.emit(VHSUBPD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
821    }
822}
823
824impl<'a> VhsubpdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
825    fn vhsubpd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
826        self.emit(VHSUBPD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
827    }
828}
829
830/// `VHSUBPS` (VHSUBPS). 
831/// Subtracts the single precision floating-point value in the second dword of the destination operand from the first dword of the destination operand and stores the result in the first dword of the destination operand.
832///
833///
834/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HSUBPS.html).
835///
836/// Supported operand variants:
837///
838/// ```text
839/// +---+---------------+
840/// | # | Operands      |
841/// +---+---------------+
842/// | 1 | Xmm, Xmm, Mem |
843/// | 2 | Xmm, Xmm, Xmm |
844/// | 3 | Ymm, Ymm, Mem |
845/// | 4 | Ymm, Ymm, Ymm |
846/// +---+---------------+
847/// ```
848pub trait VhsubpsEmitter<A, B, C> {
849    fn vhsubps(&mut self, op0: A, op1: B, op2: C);
850}
851
852impl<'a> VhsubpsEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
853    fn vhsubps(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
854        self.emit(VHSUBPS128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
855    }
856}
857
858impl<'a> VhsubpsEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
859    fn vhsubps(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
860        self.emit(VHSUBPS128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
861    }
862}
863
864impl<'a> VhsubpsEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
865    fn vhsubps(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
866        self.emit(VHSUBPS256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
867    }
868}
869
870impl<'a> VhsubpsEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
871    fn vhsubps(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
872        self.emit(VHSUBPS256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
873    }
874}
875
876/// `VINSERTF128` (VINSERTF128). 
877/// VINSERTF128/VINSERTF32x4 and VINSERTF64x2 insert 128-bits of packed floating-point values from the second source operand (the third operand) into the destination operand (the first operand) at an 128-bit granularity offset multiplied by imm8[0] (256-bit) or imm8[1:0]. The remaining portions of the destination operand are copied from the corresponding fields of the first source operand (the second operand). The second source operand can be either an XMM register or a 128-bit memory location. The destination and first source operands are vector registers.
878///
879///
880/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VINSERTF128%3AVINSERTF32x4%3AVINSERTF64x2%3AVINSERTF32x8%3AVINSERTF64x4.html).
881///
882/// Supported operand variants:
883///
884/// ```text
885/// +---+--------------------+
886/// | # | Operands           |
887/// +---+--------------------+
888/// | 1 | Ymm, Ymm, Mem, Imm |
889/// | 2 | Ymm, Ymm, Xmm, Imm |
890/// +---+--------------------+
891/// ```
892pub trait Vinsertf128Emitter<A, B, C, D> {
893    fn vinsertf128(&mut self, op0: A, op1: B, op2: C, op3: D);
894}
895
896impl<'a> Vinsertf128Emitter<Ymm, Ymm, Xmm, Imm> for Assembler<'a> {
897    fn vinsertf128(&mut self, op0: Ymm, op1: Ymm, op2: Xmm, op3: Imm) {
898        self.emit(VINSERTF128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
899    }
900}
901
902impl<'a> Vinsertf128Emitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
903    fn vinsertf128(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
904        self.emit(VINSERTF128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
905    }
906}
907
908/// `VLDDQU` (VLDDQU). 
909/// The instruction is functionally similar to (V)MOVDQU ymm/xmm, m256/m128 for loading from memory. That is: 32/16 bytes of data starting at an address specified by the source memory operand (second operand) are fetched from memory and placed in a destination register (first operand). The source operand need not be aligned on a 32/16-byte boundary. Up to 64/32 bytes may be loaded from memory; this is implementation dependent.
910///
911///
912/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDDQU.html).
913///
914/// Supported operand variants:
915///
916/// ```text
917/// +---+----------+
918/// | # | Operands |
919/// +---+----------+
920/// | 1 | Xmm, Mem |
921/// | 2 | Ymm, Mem |
922/// +---+----------+
923/// ```
924pub trait VlddquEmitter<A, B> {
925    fn vlddqu(&mut self, op0: A, op1: B);
926}
927
928impl<'a> VlddquEmitter<Xmm, Mem> for Assembler<'a> {
929    fn vlddqu(&mut self, op0: Xmm, op1: Mem) {
930        self.emit(VLDDQU128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
931    }
932}
933
934impl<'a> VlddquEmitter<Ymm, Mem> for Assembler<'a> {
935    fn vlddqu(&mut self, op0: Ymm, op1: Mem) {
936        self.emit(VLDDQU256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
937    }
938}
939
940/// `VLDMXCSR` (VLDMXCSR). 
941/// Loads the source operand into the MXCSR control/status register. The source operand is a 32-bit memory location. See “MXCSR Control and Status Register” in Chapter 10, of the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for a description of the MXCSR register and its contents.
942///
943///
944/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDMXCSR.html).
945///
946/// Supported operand variants:
947///
948/// ```text
949/// +---+----------+
950/// | # | Operands |
951/// +---+----------+
952/// | 1 | Mem      |
953/// +---+----------+
954/// ```
955pub trait VldmxcsrEmitter<A> {
956    fn vldmxcsr(&mut self, op0: A);
957}
958
959impl<'a> VldmxcsrEmitter<Mem> for Assembler<'a> {
960    fn vldmxcsr(&mut self, op0: Mem) {
961        self.emit(VLDMXCSRM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
962    }
963}
964
965/// `VMASKMOVDQU` (VMASKMOVDQU). 
966/// Stores selected bytes from the source operand (first operand) into an 128-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are XMM registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
967///
968///
969/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVDQU.html).
970///
971/// Supported operand variants:
972///
973/// ```text
974/// +---+----------+
975/// | # | Operands |
976/// +---+----------+
977/// | 1 | Xmm, Xmm |
978/// +---+----------+
979/// ```
980pub trait VmaskmovdquEmitter<A, B> {
981    fn vmaskmovdqu(&mut self, op0: A, op1: B);
982}
983
984impl<'a> VmaskmovdquEmitter<Xmm, Xmm> for Assembler<'a> {
985    fn vmaskmovdqu(&mut self, op0: Xmm, op1: Xmm) {
986        self.emit(VMASKMOVDQU128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
987    }
988}
989
990/// `VMASKMOVPD` (VMASKMOVPD). 
991/// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
992///
993///
994/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VMASKMOV.html).
995///
996/// Supported operand variants:
997///
998/// ```text
999/// +---+---------------+
1000/// | # | Operands      |
1001/// +---+---------------+
1002/// | 1 | Mem, Xmm, Xmm |
1003/// | 2 | Mem, Ymm, Ymm |
1004/// | 3 | Xmm, Xmm, Mem |
1005/// | 4 | Ymm, Ymm, Mem |
1006/// +---+---------------+
1007/// ```
1008pub trait VmaskmovpdEmitter<A, B, C> {
1009    fn vmaskmovpd(&mut self, op0: A, op1: B, op2: C);
1010}
1011
1012impl<'a> VmaskmovpdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1013    fn vmaskmovpd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1014        self.emit(VMASKMOVPD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1015    }
1016}
1017
1018impl<'a> VmaskmovpdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1019    fn vmaskmovpd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1020        self.emit(VMASKMOVPD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1021    }
1022}
1023
1024impl<'a> VmaskmovpdEmitter<Mem, Xmm, Xmm> for Assembler<'a> {
1025    fn vmaskmovpd(&mut self, op0: Mem, op1: Xmm, op2: Xmm) {
1026        self.emit(VMASKMOVPD128MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1027    }
1028}
1029
1030impl<'a> VmaskmovpdEmitter<Mem, Ymm, Ymm> for Assembler<'a> {
1031    fn vmaskmovpd(&mut self, op0: Mem, op1: Ymm, op2: Ymm) {
1032        self.emit(VMASKMOVPD256MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1033    }
1034}
1035
1036/// `VMASKMOVPS` (VMASKMOVPS). 
1037/// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
1038///
1039///
1040/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VMASKMOV.html).
1041///
1042/// Supported operand variants:
1043///
1044/// ```text
1045/// +---+---------------+
1046/// | # | Operands      |
1047/// +---+---------------+
1048/// | 1 | Mem, Xmm, Xmm |
1049/// | 2 | Mem, Ymm, Ymm |
1050/// | 3 | Xmm, Xmm, Mem |
1051/// | 4 | Ymm, Ymm, Mem |
1052/// +---+---------------+
1053/// ```
1054pub trait VmaskmovpsEmitter<A, B, C> {
1055    fn vmaskmovps(&mut self, op0: A, op1: B, op2: C);
1056}
1057
1058impl<'a> VmaskmovpsEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1059    fn vmaskmovps(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1060        self.emit(VMASKMOVPS128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1061    }
1062}
1063
1064impl<'a> VmaskmovpsEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1065    fn vmaskmovps(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1066        self.emit(VMASKMOVPS256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1067    }
1068}
1069
1070impl<'a> VmaskmovpsEmitter<Mem, Xmm, Xmm> for Assembler<'a> {
1071    fn vmaskmovps(&mut self, op0: Mem, op1: Xmm, op2: Xmm) {
1072        self.emit(VMASKMOVPS128MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1073    }
1074}
1075
1076impl<'a> VmaskmovpsEmitter<Mem, Ymm, Ymm> for Assembler<'a> {
1077    fn vmaskmovps(&mut self, op0: Mem, op1: Ymm, op2: Ymm) {
1078        self.emit(VMASKMOVPS256MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1079    }
1080}
1081
1082/// `VMOVD` (VMOVD). 
1083/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1084///
1085///
1086/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1087///
1088/// Supported operand variants:
1089///
1090/// ```text
1091/// +---+----------+
1092/// | # | Operands |
1093/// +---+----------+
1094/// | 1 | Mem, Xmm |
1095/// | 2 | Xmm, Mem |
1096/// +---+----------+
1097/// ```
1098pub trait VmovdEmitter<A, B> {
1099    fn vmovd(&mut self, op0: A, op1: B);
1100}
1101
1102impl<'a> VmovdEmitter<Xmm, Mem> for Assembler<'a> {
1103    fn vmovd(&mut self, op0: Xmm, op1: Mem) {
1104        self.emit(VMOVDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1105    }
1106}
1107
1108impl<'a> VmovdEmitter<Mem, Xmm> for Assembler<'a> {
1109    fn vmovd(&mut self, op0: Mem, op1: Xmm) {
1110        self.emit(VMOVDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1111    }
1112}
1113
1114/// `VMOVDQA` (VMOVDQA). 
1115/// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
1116///
1117///
1118/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQA%3AVMOVDQA32%3AVMOVDQA64.html).
1119///
1120/// Supported operand variants:
1121///
1122/// ```text
1123/// +---+----------+
1124/// | # | Operands |
1125/// +---+----------+
1126/// | 1 | Mem, Xmm |
1127/// | 2 | Mem, Ymm |
1128/// | 3 | Xmm, Mem |
1129/// | 4 | Xmm, Xmm |
1130/// | 5 | Ymm, Mem |
1131/// | 6 | Ymm, Ymm |
1132/// +---+----------+
1133/// ```
1134pub trait VmovdqaEmitter<A, B> {
1135    fn vmovdqa(&mut self, op0: A, op1: B);
1136}
1137
1138impl<'a> VmovdqaEmitter<Xmm, Xmm> for Assembler<'a> {
1139    fn vmovdqa(&mut self, op0: Xmm, op1: Xmm) {
1140        self.emit(VMOVDQA128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1141    }
1142}
1143
1144impl<'a> VmovdqaEmitter<Xmm, Mem> for Assembler<'a> {
1145    fn vmovdqa(&mut self, op0: Xmm, op1: Mem) {
1146        self.emit(VMOVDQA128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1147    }
1148}
1149
1150impl<'a> VmovdqaEmitter<Ymm, Ymm> for Assembler<'a> {
1151    fn vmovdqa(&mut self, op0: Ymm, op1: Ymm) {
1152        self.emit(VMOVDQA256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1153    }
1154}
1155
1156impl<'a> VmovdqaEmitter<Ymm, Mem> for Assembler<'a> {
1157    fn vmovdqa(&mut self, op0: Ymm, op1: Mem) {
1158        self.emit(VMOVDQA256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1159    }
1160}
1161
1162impl<'a> VmovdqaEmitter<Mem, Xmm> for Assembler<'a> {
1163    fn vmovdqa(&mut self, op0: Mem, op1: Xmm) {
1164        self.emit(VMOVDQA128MR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1165    }
1166}
1167
1168impl<'a> VmovdqaEmitter<Mem, Ymm> for Assembler<'a> {
1169    fn vmovdqa(&mut self, op0: Mem, op1: Ymm) {
1170        self.emit(VMOVDQA256MR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1171    }
1172}
1173
1174/// `VMOVDQU` (VMOVDQU). 
1175/// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
1176///
1177///
1178/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQU%3AVMOVDQU8%3AVMOVDQU16%3AVMOVDQU32%3AVMOVDQU64.html).
1179///
1180/// Supported operand variants:
1181///
1182/// ```text
1183/// +---+----------+
1184/// | # | Operands |
1185/// +---+----------+
1186/// | 1 | Mem, Xmm |
1187/// | 2 | Mem, Ymm |
1188/// | 3 | Xmm, Mem |
1189/// | 4 | Xmm, Xmm |
1190/// | 5 | Ymm, Mem |
1191/// | 6 | Ymm, Ymm |
1192/// +---+----------+
1193/// ```
1194pub trait VmovdquEmitter<A, B> {
1195    fn vmovdqu(&mut self, op0: A, op1: B);
1196}
1197
1198impl<'a> VmovdquEmitter<Xmm, Xmm> for Assembler<'a> {
1199    fn vmovdqu(&mut self, op0: Xmm, op1: Xmm) {
1200        self.emit(VMOVDQU128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1201    }
1202}
1203
1204impl<'a> VmovdquEmitter<Xmm, Mem> for Assembler<'a> {
1205    fn vmovdqu(&mut self, op0: Xmm, op1: Mem) {
1206        self.emit(VMOVDQU128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1207    }
1208}
1209
1210impl<'a> VmovdquEmitter<Ymm, Ymm> for Assembler<'a> {
1211    fn vmovdqu(&mut self, op0: Ymm, op1: Ymm) {
1212        self.emit(VMOVDQU256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1213    }
1214}
1215
1216impl<'a> VmovdquEmitter<Ymm, Mem> for Assembler<'a> {
1217    fn vmovdqu(&mut self, op0: Ymm, op1: Mem) {
1218        self.emit(VMOVDQU256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1219    }
1220}
1221
1222impl<'a> VmovdquEmitter<Mem, Xmm> for Assembler<'a> {
1223    fn vmovdqu(&mut self, op0: Mem, op1: Xmm) {
1224        self.emit(VMOVDQU128MR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1225    }
1226}
1227
1228impl<'a> VmovdquEmitter<Mem, Ymm> for Assembler<'a> {
1229    fn vmovdqu(&mut self, op0: Mem, op1: Ymm) {
1230        self.emit(VMOVDQU256MR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1231    }
1232}
1233
1234/// `VMOVD_G2X` (VMOVD). 
1235/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1236///
1237///
1238/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1239///
1240/// Supported operand variants:
1241///
1242/// ```text
1243/// +---+----------+
1244/// | # | Operands |
1245/// +---+----------+
1246/// | 1 | Xmm, Gpd |
1247/// +---+----------+
1248/// ```
1249pub trait VmovdG2xEmitter<A, B> {
1250    fn vmovd_g2x(&mut self, op0: A, op1: B);
1251}
1252
1253impl<'a> VmovdG2xEmitter<Xmm, Gpd> for Assembler<'a> {
1254    fn vmovd_g2x(&mut self, op0: Xmm, op1: Gpd) {
1255        self.emit(VMOVD_G2XRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1256    }
1257}
1258
1259/// `VMOVD_X2G` (VMOVD). 
1260/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1261///
1262///
1263/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1264///
1265/// Supported operand variants:
1266///
1267/// ```text
1268/// +---+----------+
1269/// | # | Operands |
1270/// +---+----------+
1271/// | 1 | Gpd, Xmm |
1272/// +---+----------+
1273/// ```
1274pub trait VmovdX2gEmitter<A, B> {
1275    fn vmovd_x2g(&mut self, op0: A, op1: B);
1276}
1277
1278impl<'a> VmovdX2gEmitter<Gpd, Xmm> for Assembler<'a> {
1279    fn vmovd_x2g(&mut self, op0: Gpd, op1: Xmm) {
1280        self.emit(VMOVD_X2GRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1281    }
1282}
1283
1284/// `VMOVMSKPD` (VMOVMSKPD). 
1285/// Extracts the sign bits from the packed double precision floating-point values in the source operand (second operand), formats them into a 2-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM register, and the destination operand is a general-purpose register. The mask is stored in the 2 low-order bits of the destination operand. Zero-extend the upper bits of the destination.
1286///
1287///
1288/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPD.html).
1289///
1290/// Supported operand variants:
1291///
1292/// ```text
1293/// +---+----------+
1294/// | # | Operands |
1295/// +---+----------+
1296/// | 1 | Gpd, Xmm |
1297/// | 2 | Gpd, Ymm |
1298/// +---+----------+
1299/// ```
1300pub trait VmovmskpdEmitter<A, B> {
1301    fn vmovmskpd(&mut self, op0: A, op1: B);
1302}
1303
1304impl<'a> VmovmskpdEmitter<Gpd, Xmm> for Assembler<'a> {
1305    fn vmovmskpd(&mut self, op0: Gpd, op1: Xmm) {
1306        self.emit(VMOVMSKPD128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1307    }
1308}
1309
1310impl<'a> VmovmskpdEmitter<Gpd, Ymm> for Assembler<'a> {
1311    fn vmovmskpd(&mut self, op0: Gpd, op1: Ymm) {
1312        self.emit(VMOVMSKPD256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1313    }
1314}
1315
1316/// `VMOVMSKPS` (VMOVMSKPS). 
1317/// Extracts the sign bits from the packed single precision floating-point values in the source operand (second operand), formats them into a 4- or 8-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM or YMM register, and the destination operand is a general-purpose register. The mask is stored in the 4 or 8 low-order bits of the destination operand. The upper bits of the destination operand beyond the mask are filled with zeros.
1318///
1319///
1320/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPS.html).
1321///
1322/// Supported operand variants:
1323///
1324/// ```text
1325/// +---+----------+
1326/// | # | Operands |
1327/// +---+----------+
1328/// | 1 | Gpd, Xmm |
1329/// | 2 | Gpd, Ymm |
1330/// +---+----------+
1331/// ```
1332pub trait VmovmskpsEmitter<A, B> {
1333    fn vmovmskps(&mut self, op0: A, op1: B);
1334}
1335
1336impl<'a> VmovmskpsEmitter<Gpd, Xmm> for Assembler<'a> {
1337    fn vmovmskps(&mut self, op0: Gpd, op1: Xmm) {
1338        self.emit(VMOVMSKPS128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1339    }
1340}
1341
1342impl<'a> VmovmskpsEmitter<Gpd, Ymm> for Assembler<'a> {
1343    fn vmovmskps(&mut self, op0: Gpd, op1: Ymm) {
1344        self.emit(VMOVMSKPS256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1345    }
1346}
1347
1348/// `VMOVQ_G2X` (VMOVQ). 
1349/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1350///
1351///
1352/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1353///
1354/// Supported operand variants:
1355///
1356/// ```text
1357/// +---+----------+
1358/// | # | Operands |
1359/// +---+----------+
1360/// | 1 | Xmm, Gpd |
1361/// | 2 | Xmm, Gpq |
1362/// | 3 | Xmm, Mem |
1363/// +---+----------+
1364/// ```
1365pub trait VmovqG2xEmitter<A, B> {
1366    fn vmovq_g2x(&mut self, op0: A, op1: B);
1367}
1368
1369impl<'a> VmovqG2xEmitter<Xmm, Gpd> for Assembler<'a> {
1370    fn vmovq_g2x(&mut self, op0: Xmm, op1: Gpd) {
1371        self.emit(VMOVQ_G2XRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1372    }
1373}
1374
1375impl<'a> VmovqG2xEmitter<Xmm, Mem> for Assembler<'a> {
1376    fn vmovq_g2x(&mut self, op0: Xmm, op1: Mem) {
1377        self.emit(VMOVQ_G2XRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1378    }
1379}
1380
1381impl<'a> VmovqG2xEmitter<Xmm, Gpq> for Assembler<'a> {
1382    fn vmovq_g2x(&mut self, op0: Xmm, op1: Gpq) {
1383        self.emit(VMOVQ_G2XRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1384    }
1385}
1386
1387/// `VMOVQ_X2G` (VMOVQ). 
1388/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1389///
1390///
1391/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1392///
1393/// Supported operand variants:
1394///
1395/// ```text
1396/// +---+----------+
1397/// | # | Operands |
1398/// +---+----------+
1399/// | 1 | Gpd, Xmm |
1400/// | 2 | Gpq, Xmm |
1401/// | 3 | Mem, Xmm |
1402/// +---+----------+
1403/// ```
1404pub trait VmovqX2gEmitter<A, B> {
1405    fn vmovq_x2g(&mut self, op0: A, op1: B);
1406}
1407
1408impl<'a> VmovqX2gEmitter<Gpd, Xmm> for Assembler<'a> {
1409    fn vmovq_x2g(&mut self, op0: Gpd, op1: Xmm) {
1410        self.emit(VMOVQ_X2GRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1411    }
1412}
1413
1414impl<'a> VmovqX2gEmitter<Mem, Xmm> for Assembler<'a> {
1415    fn vmovq_x2g(&mut self, op0: Mem, op1: Xmm) {
1416        self.emit(VMOVQ_X2GMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1417    }
1418}
1419
1420impl<'a> VmovqX2gEmitter<Gpq, Xmm> for Assembler<'a> {
1421    fn vmovq_x2g(&mut self, op0: Gpq, op1: Xmm) {
1422        self.emit(VMOVQ_X2GRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1423    }
1424}
1425
1426/// `VMPSADBW` (VMPSADBW). 
1427/// (V)MPSADBW calculates packed word results of sum-absolute-difference (SAD) of unsigned bytes from two blocks of 32-bit dword elements, using two select fields in the immediate byte to select the offsets of the two blocks within the first source operand and the second operand. Packed SAD word results are calculated within each 128-bit lane. Each SAD word result is calculated between a stationary block_2 (whose offset within the second source operand is selected by a two bit select control, multiplied by 32 bits) and a sliding block_1 at consecutive byte-granular position within the first source operand. The offset of the first 32-bit block of block_1 is selectable using a one bit select control, multiplied by 32 bits.
1428///
1429///
1430/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MPSADBW.html).
1431///
1432/// Supported operand variants:
1433///
1434/// ```text
1435/// +---+--------------------+
1436/// | # | Operands           |
1437/// +---+--------------------+
1438/// | 1 | Xmm, Xmm, Mem, Imm |
1439/// | 2 | Xmm, Xmm, Xmm, Imm |
1440/// | 3 | Ymm, Ymm, Mem, Imm |
1441/// | 4 | Ymm, Ymm, Ymm, Imm |
1442/// +---+--------------------+
1443/// ```
1444pub trait VmpsadbwEmitter<A, B, C, D> {
1445    fn vmpsadbw(&mut self, op0: A, op1: B, op2: C, op3: D);
1446}
1447
1448impl<'a> VmpsadbwEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
1449    fn vmpsadbw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
1450        self.emit(VMPSADBW128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1451    }
1452}
1453
1454impl<'a> VmpsadbwEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
1455    fn vmpsadbw(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
1456        self.emit(VMPSADBW128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1457    }
1458}
1459
1460impl<'a> VmpsadbwEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
1461    fn vmpsadbw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
1462        self.emit(VMPSADBW256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1463    }
1464}
1465
1466impl<'a> VmpsadbwEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
1467    fn vmpsadbw(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
1468        self.emit(VMPSADBW256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1469    }
1470}
1471
1472/// `VPAND` (VPAND). 
1473/// Performs a bitwise logical AND operation on the first source operand and second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bits of the first and second operands are 1, otherwise it is set to 0.
1474///
1475///
1476/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAND.html).
1477///
1478/// Supported operand variants:
1479///
1480/// ```text
1481/// +---+---------------+
1482/// | # | Operands      |
1483/// +---+---------------+
1484/// | 1 | Xmm, Xmm, Mem |
1485/// | 2 | Xmm, Xmm, Xmm |
1486/// | 3 | Ymm, Ymm, Mem |
1487/// | 4 | Ymm, Ymm, Ymm |
1488/// +---+---------------+
1489/// ```
1490pub trait VpandEmitter<A, B, C> {
1491    fn vpand(&mut self, op0: A, op1: B, op2: C);
1492}
1493
1494impl<'a> VpandEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
1495    fn vpand(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
1496        self.emit(VPAND128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1497    }
1498}
1499
1500impl<'a> VpandEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1501    fn vpand(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1502        self.emit(VPAND128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1503    }
1504}
1505
1506impl<'a> VpandEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
1507    fn vpand(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
1508        self.emit(VPAND256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1509    }
1510}
1511
1512impl<'a> VpandEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1513    fn vpand(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1514        self.emit(VPAND256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1515    }
1516}
1517
1518/// `VPANDN` (VPANDN). 
1519/// Performs a bitwise logical NOT operation on the first source operand, then performs bitwise AND with second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bit in the first operand is 0 and the corresponding bit in the second operand is 1, otherwise it is set to 0.
1520///
1521///
1522/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PANDN.html).
1523///
1524/// Supported operand variants:
1525///
1526/// ```text
1527/// +---+---------------+
1528/// | # | Operands      |
1529/// +---+---------------+
1530/// | 1 | Xmm, Xmm, Mem |
1531/// | 2 | Xmm, Xmm, Xmm |
1532/// | 3 | Ymm, Ymm, Mem |
1533/// | 4 | Ymm, Ymm, Ymm |
1534/// +---+---------------+
1535/// ```
1536pub trait VpandnEmitter<A, B, C> {
1537    fn vpandn(&mut self, op0: A, op1: B, op2: C);
1538}
1539
1540impl<'a> VpandnEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
1541    fn vpandn(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
1542        self.emit(VPANDN128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1543    }
1544}
1545
1546impl<'a> VpandnEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1547    fn vpandn(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1548        self.emit(VPANDN128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1549    }
1550}
1551
1552impl<'a> VpandnEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
1553    fn vpandn(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
1554        self.emit(VPANDN256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1555    }
1556}
1557
1558impl<'a> VpandnEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1559    fn vpandn(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1560        self.emit(VPANDN256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1561    }
1562}
1563
1564/// `VPBLENDVB` (VPBLENDVB). 
1565/// Conditionally copies byte elements from the source operand (second operand) to the destination operand (first operand) depending on mask bits defined in the implicit third register argument, XMM0. The mask bits are the most significant bit in each byte element of the XMM0 register.
1566///
1567///
1568/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PBLENDVB.html).
1569///
1570/// Supported operand variants:
1571///
1572/// ```text
1573/// +---+--------------------+
1574/// | # | Operands           |
1575/// +---+--------------------+
1576/// | 1 | Xmm, Xmm, Mem, Xmm |
1577/// | 2 | Xmm, Xmm, Xmm, Xmm |
1578/// | 3 | Ymm, Ymm, Mem, Ymm |
1579/// | 4 | Ymm, Ymm, Ymm, Ymm |
1580/// +---+--------------------+
1581/// ```
1582pub trait VpblendvbEmitter<A, B, C, D> {
1583    fn vpblendvb(&mut self, op0: A, op1: B, op2: C, op3: D);
1584}
1585
1586impl<'a> VpblendvbEmitter<Xmm, Xmm, Xmm, Xmm> for Assembler<'a> {
1587    fn vpblendvb(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Xmm) {
1588        self.emit(VPBLENDVB128RRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1589    }
1590}
1591
1592impl<'a> VpblendvbEmitter<Xmm, Xmm, Mem, Xmm> for Assembler<'a> {
1593    fn vpblendvb(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Xmm) {
1594        self.emit(VPBLENDVB128RRMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1595    }
1596}
1597
1598impl<'a> VpblendvbEmitter<Ymm, Ymm, Ymm, Ymm> for Assembler<'a> {
1599    fn vpblendvb(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Ymm) {
1600        self.emit(VPBLENDVB256RRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1601    }
1602}
1603
1604impl<'a> VpblendvbEmitter<Ymm, Ymm, Mem, Ymm> for Assembler<'a> {
1605    fn vpblendvb(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Ymm) {
1606        self.emit(VPBLENDVB256RRMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1607    }
1608}
1609
1610/// `VPBLENDW` (VPBLENDW). 
1611/// Words from the source operand (second operand) are conditionally written to the destination operand (first operand) depending on bits in the immediate operand (third operand). The immediate bits (bits 7:0) form a mask that determines whether the corresponding word in the destination is copied from the source. If a bit in the mask, corresponding to a word, is “1", then the word is copied, else the word element in the destination operand is unchanged.
1612///
1613///
1614/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PBLENDW.html).
1615///
1616/// Supported operand variants:
1617///
1618/// ```text
1619/// +---+--------------------+
1620/// | # | Operands           |
1621/// +---+--------------------+
1622/// | 1 | Xmm, Xmm, Mem, Imm |
1623/// | 2 | Xmm, Xmm, Xmm, Imm |
1624/// | 3 | Ymm, Ymm, Mem, Imm |
1625/// | 4 | Ymm, Ymm, Ymm, Imm |
1626/// +---+--------------------+
1627/// ```
1628pub trait VpblendwEmitter<A, B, C, D> {
1629    fn vpblendw(&mut self, op0: A, op1: B, op2: C, op3: D);
1630}
1631
1632impl<'a> VpblendwEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
1633    fn vpblendw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
1634        self.emit(VPBLENDW128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1635    }
1636}
1637
1638impl<'a> VpblendwEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
1639    fn vpblendw(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
1640        self.emit(VPBLENDW128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1641    }
1642}
1643
1644impl<'a> VpblendwEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
1645    fn vpblendw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
1646        self.emit(VPBLENDW256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1647    }
1648}
1649
1650impl<'a> VpblendwEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
1651    fn vpblendw(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
1652        self.emit(VPBLENDW256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
1653    }
1654}
1655
1656/// `VPCMPEQB` (VPCMPEQB). 
1657/// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
1658///
1659///
1660/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
1661///
1662/// Supported operand variants:
1663///
1664/// ```text
1665/// +----+----------------+
1666/// | #  | Operands       |
1667/// +----+----------------+
1668/// | 1  | KReg, Xmm, Mem |
1669/// | 2  | KReg, Xmm, Xmm |
1670/// | 3  | KReg, Ymm, Mem |
1671/// | 4  | KReg, Ymm, Ymm |
1672/// | 5  | KReg, Zmm, Mem |
1673/// | 6  | KReg, Zmm, Zmm |
1674/// | 7  | Xmm, Xmm, Mem  |
1675/// | 8  | Xmm, Xmm, Xmm  |
1676/// | 9  | Ymm, Ymm, Mem  |
1677/// | 10 | Ymm, Ymm, Ymm  |
1678/// +----+----------------+
1679/// ```
1680pub trait VpcmpeqbEmitter<A, B, C> {
1681    fn vpcmpeqb(&mut self, op0: A, op1: B, op2: C);
1682}
1683
1684impl<'a> VpcmpeqbEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
1685    fn vpcmpeqb(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
1686        self.emit(VPCMPEQB128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1687    }
1688}
1689
1690impl<'a> VpcmpeqbEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1691    fn vpcmpeqb(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1692        self.emit(VPCMPEQB128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1693    }
1694}
1695
1696impl<'a> VpcmpeqbEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
1697    fn vpcmpeqb(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
1698        self.emit(VPCMPEQB256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1699    }
1700}
1701
1702impl<'a> VpcmpeqbEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1703    fn vpcmpeqb(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1704        self.emit(VPCMPEQB256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1705    }
1706}
1707
1708impl<'a> VpcmpeqbEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
1709    fn vpcmpeqb(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
1710        self.emit(VPCMPEQB128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1711    }
1712}
1713
1714impl<'a> VpcmpeqbEmitter<KReg, Xmm, Mem> for Assembler<'a> {
1715    fn vpcmpeqb(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
1716        self.emit(VPCMPEQB128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1717    }
1718}
1719
1720impl<'a> VpcmpeqbEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
1721    fn vpcmpeqb(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
1722        self.emit(VPCMPEQB256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1723    }
1724}
1725
1726impl<'a> VpcmpeqbEmitter<KReg, Ymm, Mem> for Assembler<'a> {
1727    fn vpcmpeqb(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
1728        self.emit(VPCMPEQB256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1729    }
1730}
1731
1732impl<'a> VpcmpeqbEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
1733    fn vpcmpeqb(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
1734        self.emit(VPCMPEQB512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1735    }
1736}
1737
1738impl<'a> VpcmpeqbEmitter<KReg, Zmm, Mem> for Assembler<'a> {
1739    fn vpcmpeqb(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
1740        self.emit(VPCMPEQB512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1741    }
1742}
1743
1744/// `VPCMPEQD` (VPCMPEQD). 
1745/// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
1746///
1747///
1748/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
1749///
1750/// Supported operand variants:
1751///
1752/// ```text
1753/// +----+----------------+
1754/// | #  | Operands       |
1755/// +----+----------------+
1756/// | 1  | KReg, Xmm, Mem |
1757/// | 2  | KReg, Xmm, Xmm |
1758/// | 3  | KReg, Ymm, Mem |
1759/// | 4  | KReg, Ymm, Ymm |
1760/// | 5  | KReg, Zmm, Mem |
1761/// | 6  | KReg, Zmm, Zmm |
1762/// | 7  | Xmm, Xmm, Mem  |
1763/// | 8  | Xmm, Xmm, Xmm  |
1764/// | 9  | Ymm, Ymm, Mem  |
1765/// | 10 | Ymm, Ymm, Ymm  |
1766/// +----+----------------+
1767/// ```
1768pub trait VpcmpeqdEmitter<A, B, C> {
1769    fn vpcmpeqd(&mut self, op0: A, op1: B, op2: C);
1770}
1771
1772impl<'a> VpcmpeqdEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
1773    fn vpcmpeqd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
1774        self.emit(VPCMPEQD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1775    }
1776}
1777
1778impl<'a> VpcmpeqdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1779    fn vpcmpeqd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1780        self.emit(VPCMPEQD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1781    }
1782}
1783
1784impl<'a> VpcmpeqdEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
1785    fn vpcmpeqd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
1786        self.emit(VPCMPEQD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1787    }
1788}
1789
1790impl<'a> VpcmpeqdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1791    fn vpcmpeqd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1792        self.emit(VPCMPEQD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1793    }
1794}
1795
1796impl<'a> VpcmpeqdEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
1797    fn vpcmpeqd(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
1798        self.emit(VPCMPEQD128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1799    }
1800}
1801
1802impl<'a> VpcmpeqdEmitter<KReg, Xmm, Mem> for Assembler<'a> {
1803    fn vpcmpeqd(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
1804        self.emit(VPCMPEQD128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1805    }
1806}
1807
1808impl<'a> VpcmpeqdEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
1809    fn vpcmpeqd(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
1810        self.emit(VPCMPEQD256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1811    }
1812}
1813
1814impl<'a> VpcmpeqdEmitter<KReg, Ymm, Mem> for Assembler<'a> {
1815    fn vpcmpeqd(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
1816        self.emit(VPCMPEQD256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1817    }
1818}
1819
1820impl<'a> VpcmpeqdEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
1821    fn vpcmpeqd(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
1822        self.emit(VPCMPEQD512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1823    }
1824}
1825
1826impl<'a> VpcmpeqdEmitter<KReg, Zmm, Mem> for Assembler<'a> {
1827    fn vpcmpeqd(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
1828        self.emit(VPCMPEQD512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1829    }
1830}
1831
1832/// `VPCMPEQQ` (VPCMPEQQ). 
1833/// Performs an SIMD compare for equality of the packed quadwords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination is set to all 1s; otherwise, it is set to 0s.
1834///
1835///
1836/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQQ.html).
1837///
1838/// Supported operand variants:
1839///
1840/// ```text
1841/// +----+----------------+
1842/// | #  | Operands       |
1843/// +----+----------------+
1844/// | 1  | KReg, Xmm, Mem |
1845/// | 2  | KReg, Xmm, Xmm |
1846/// | 3  | KReg, Ymm, Mem |
1847/// | 4  | KReg, Ymm, Ymm |
1848/// | 5  | KReg, Zmm, Mem |
1849/// | 6  | KReg, Zmm, Zmm |
1850/// | 7  | Xmm, Xmm, Mem  |
1851/// | 8  | Xmm, Xmm, Xmm  |
1852/// | 9  | Ymm, Ymm, Mem  |
1853/// | 10 | Ymm, Ymm, Ymm  |
1854/// +----+----------------+
1855/// ```
1856pub trait VpcmpeqqEmitter<A, B, C> {
1857    fn vpcmpeqq(&mut self, op0: A, op1: B, op2: C);
1858}
1859
1860impl<'a> VpcmpeqqEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
1861    fn vpcmpeqq(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
1862        self.emit(VPCMPEQQ128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1863    }
1864}
1865
1866impl<'a> VpcmpeqqEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1867    fn vpcmpeqq(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1868        self.emit(VPCMPEQQ128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1869    }
1870}
1871
1872impl<'a> VpcmpeqqEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
1873    fn vpcmpeqq(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
1874        self.emit(VPCMPEQQ256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1875    }
1876}
1877
1878impl<'a> VpcmpeqqEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1879    fn vpcmpeqq(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1880        self.emit(VPCMPEQQ256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1881    }
1882}
1883
1884impl<'a> VpcmpeqqEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
1885    fn vpcmpeqq(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
1886        self.emit(VPCMPEQQ128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1887    }
1888}
1889
1890impl<'a> VpcmpeqqEmitter<KReg, Xmm, Mem> for Assembler<'a> {
1891    fn vpcmpeqq(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
1892        self.emit(VPCMPEQQ128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1893    }
1894}
1895
1896impl<'a> VpcmpeqqEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
1897    fn vpcmpeqq(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
1898        self.emit(VPCMPEQQ256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1899    }
1900}
1901
1902impl<'a> VpcmpeqqEmitter<KReg, Ymm, Mem> for Assembler<'a> {
1903    fn vpcmpeqq(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
1904        self.emit(VPCMPEQQ256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1905    }
1906}
1907
1908impl<'a> VpcmpeqqEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
1909    fn vpcmpeqq(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
1910        self.emit(VPCMPEQQ512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1911    }
1912}
1913
1914impl<'a> VpcmpeqqEmitter<KReg, Zmm, Mem> for Assembler<'a> {
1915    fn vpcmpeqq(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
1916        self.emit(VPCMPEQQ512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1917    }
1918}
1919
1920/// `VPCMPEQW` (VPCMPEQW). 
1921/// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
1922///
1923///
1924/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
1925///
1926/// Supported operand variants:
1927///
1928/// ```text
1929/// +----+----------------+
1930/// | #  | Operands       |
1931/// +----+----------------+
1932/// | 1  | KReg, Xmm, Mem |
1933/// | 2  | KReg, Xmm, Xmm |
1934/// | 3  | KReg, Ymm, Mem |
1935/// | 4  | KReg, Ymm, Ymm |
1936/// | 5  | KReg, Zmm, Mem |
1937/// | 6  | KReg, Zmm, Zmm |
1938/// | 7  | Xmm, Xmm, Mem  |
1939/// | 8  | Xmm, Xmm, Xmm  |
1940/// | 9  | Ymm, Ymm, Mem  |
1941/// | 10 | Ymm, Ymm, Ymm  |
1942/// +----+----------------+
1943/// ```
1944pub trait VpcmpeqwEmitter<A, B, C> {
1945    fn vpcmpeqw(&mut self, op0: A, op1: B, op2: C);
1946}
1947
1948impl<'a> VpcmpeqwEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
1949    fn vpcmpeqw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
1950        self.emit(VPCMPEQW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1951    }
1952}
1953
1954impl<'a> VpcmpeqwEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
1955    fn vpcmpeqw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
1956        self.emit(VPCMPEQW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1957    }
1958}
1959
1960impl<'a> VpcmpeqwEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
1961    fn vpcmpeqw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
1962        self.emit(VPCMPEQW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1963    }
1964}
1965
1966impl<'a> VpcmpeqwEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
1967    fn vpcmpeqw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
1968        self.emit(VPCMPEQW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1969    }
1970}
1971
1972impl<'a> VpcmpeqwEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
1973    fn vpcmpeqw(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
1974        self.emit(VPCMPEQW128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1975    }
1976}
1977
1978impl<'a> VpcmpeqwEmitter<KReg, Xmm, Mem> for Assembler<'a> {
1979    fn vpcmpeqw(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
1980        self.emit(VPCMPEQW128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1981    }
1982}
1983
1984impl<'a> VpcmpeqwEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
1985    fn vpcmpeqw(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
1986        self.emit(VPCMPEQW256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1987    }
1988}
1989
1990impl<'a> VpcmpeqwEmitter<KReg, Ymm, Mem> for Assembler<'a> {
1991    fn vpcmpeqw(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
1992        self.emit(VPCMPEQW256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1993    }
1994}
1995
1996impl<'a> VpcmpeqwEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
1997    fn vpcmpeqw(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
1998        self.emit(VPCMPEQW512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1999    }
2000}
2001
2002impl<'a> VpcmpeqwEmitter<KReg, Zmm, Mem> for Assembler<'a> {
2003    fn vpcmpeqw(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
2004        self.emit(VPCMPEQW512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2005    }
2006}
2007
2008/// `VPCMPESTRI` (VPCMPESTRI). 
2009/// The instruction compares and processes data from two string fragments based on the encoded value in the imm8 control byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”), and generates an index stored to the count register (ECX).
2010///
2011///
2012/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPESTRI.html).
2013///
2014/// Supported operand variants:
2015///
2016/// ```text
2017/// +---+---------------+
2018/// | # | Operands      |
2019/// +---+---------------+
2020/// | 1 | Xmm, Mem, Imm |
2021/// | 2 | Xmm, Xmm, Imm |
2022/// +---+---------------+
2023/// ```
2024pub trait VpcmpestriEmitter<A, B, C> {
2025    fn vpcmpestri(&mut self, op0: A, op1: B, op2: C);
2026}
2027
2028impl<'a> VpcmpestriEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2029    fn vpcmpestri(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2030        self.emit(VPCMPESTRIRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2031    }
2032}
2033
2034impl<'a> VpcmpestriEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2035    fn vpcmpestri(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2036        self.emit(VPCMPESTRIRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2037    }
2038}
2039
2040/// `VPCMPESTRM` (VPCMPESTRM). 
2041/// The instruction compares data from two string fragments based on the encoded value in the imm8 contol byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”), and generates a mask stored to XMM0.
2042///
2043///
2044/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPESTRM.html).
2045///
2046/// Supported operand variants:
2047///
2048/// ```text
2049/// +---+---------------+
2050/// | # | Operands      |
2051/// +---+---------------+
2052/// | 1 | Xmm, Mem, Imm |
2053/// | 2 | Xmm, Xmm, Imm |
2054/// +---+---------------+
2055/// ```
2056pub trait VpcmpestrmEmitter<A, B, C> {
2057    fn vpcmpestrm(&mut self, op0: A, op1: B, op2: C);
2058}
2059
2060impl<'a> VpcmpestrmEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2061    fn vpcmpestrm(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2062        self.emit(VPCMPESTRMRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2063    }
2064}
2065
2066impl<'a> VpcmpestrmEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2067    fn vpcmpestrm(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2068        self.emit(VPCMPESTRMRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2069    }
2070}
2071
2072/// `VPCMPGTB` (VPCMPGTB). 
2073/// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2074///
2075///
2076/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
2077///
2078/// Supported operand variants:
2079///
2080/// ```text
2081/// +----+----------------+
2082/// | #  | Operands       |
2083/// +----+----------------+
2084/// | 1  | KReg, Xmm, Mem |
2085/// | 2  | KReg, Xmm, Xmm |
2086/// | 3  | KReg, Ymm, Mem |
2087/// | 4  | KReg, Ymm, Ymm |
2088/// | 5  | KReg, Zmm, Mem |
2089/// | 6  | KReg, Zmm, Zmm |
2090/// | 7  | Xmm, Xmm, Mem  |
2091/// | 8  | Xmm, Xmm, Xmm  |
2092/// | 9  | Ymm, Ymm, Mem  |
2093/// | 10 | Ymm, Ymm, Ymm  |
2094/// +----+----------------+
2095/// ```
2096pub trait VpcmpgtbEmitter<A, B, C> {
2097    fn vpcmpgtb(&mut self, op0: A, op1: B, op2: C);
2098}
2099
2100impl<'a> VpcmpgtbEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2101    fn vpcmpgtb(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2102        self.emit(VPCMPGTB128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2103    }
2104}
2105
2106impl<'a> VpcmpgtbEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2107    fn vpcmpgtb(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2108        self.emit(VPCMPGTB128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2109    }
2110}
2111
2112impl<'a> VpcmpgtbEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2113    fn vpcmpgtb(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2114        self.emit(VPCMPGTB256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2115    }
2116}
2117
2118impl<'a> VpcmpgtbEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2119    fn vpcmpgtb(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2120        self.emit(VPCMPGTB256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2121    }
2122}
2123
2124impl<'a> VpcmpgtbEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
2125    fn vpcmpgtb(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
2126        self.emit(VPCMPGTB128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2127    }
2128}
2129
2130impl<'a> VpcmpgtbEmitter<KReg, Xmm, Mem> for Assembler<'a> {
2131    fn vpcmpgtb(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
2132        self.emit(VPCMPGTB128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2133    }
2134}
2135
2136impl<'a> VpcmpgtbEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
2137    fn vpcmpgtb(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
2138        self.emit(VPCMPGTB256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2139    }
2140}
2141
2142impl<'a> VpcmpgtbEmitter<KReg, Ymm, Mem> for Assembler<'a> {
2143    fn vpcmpgtb(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
2144        self.emit(VPCMPGTB256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2145    }
2146}
2147
2148impl<'a> VpcmpgtbEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
2149    fn vpcmpgtb(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
2150        self.emit(VPCMPGTB512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2151    }
2152}
2153
2154impl<'a> VpcmpgtbEmitter<KReg, Zmm, Mem> for Assembler<'a> {
2155    fn vpcmpgtb(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
2156        self.emit(VPCMPGTB512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2157    }
2158}
2159
2160/// `VPCMPGTD` (VPCMPGTD). 
2161/// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2162///
2163///
2164/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
2165///
2166/// Supported operand variants:
2167///
2168/// ```text
2169/// +----+----------------+
2170/// | #  | Operands       |
2171/// +----+----------------+
2172/// | 1  | KReg, Xmm, Mem |
2173/// | 2  | KReg, Xmm, Xmm |
2174/// | 3  | KReg, Ymm, Mem |
2175/// | 4  | KReg, Ymm, Ymm |
2176/// | 5  | KReg, Zmm, Mem |
2177/// | 6  | KReg, Zmm, Zmm |
2178/// | 7  | Xmm, Xmm, Mem  |
2179/// | 8  | Xmm, Xmm, Xmm  |
2180/// | 9  | Ymm, Ymm, Mem  |
2181/// | 10 | Ymm, Ymm, Ymm  |
2182/// +----+----------------+
2183/// ```
2184pub trait VpcmpgtdEmitter<A, B, C> {
2185    fn vpcmpgtd(&mut self, op0: A, op1: B, op2: C);
2186}
2187
2188impl<'a> VpcmpgtdEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2189    fn vpcmpgtd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2190        self.emit(VPCMPGTD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2191    }
2192}
2193
2194impl<'a> VpcmpgtdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2195    fn vpcmpgtd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2196        self.emit(VPCMPGTD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2197    }
2198}
2199
2200impl<'a> VpcmpgtdEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2201    fn vpcmpgtd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2202        self.emit(VPCMPGTD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2203    }
2204}
2205
2206impl<'a> VpcmpgtdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2207    fn vpcmpgtd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2208        self.emit(VPCMPGTD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2209    }
2210}
2211
2212impl<'a> VpcmpgtdEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
2213    fn vpcmpgtd(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
2214        self.emit(VPCMPGTD128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2215    }
2216}
2217
2218impl<'a> VpcmpgtdEmitter<KReg, Xmm, Mem> for Assembler<'a> {
2219    fn vpcmpgtd(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
2220        self.emit(VPCMPGTD128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2221    }
2222}
2223
2224impl<'a> VpcmpgtdEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
2225    fn vpcmpgtd(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
2226        self.emit(VPCMPGTD256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2227    }
2228}
2229
2230impl<'a> VpcmpgtdEmitter<KReg, Ymm, Mem> for Assembler<'a> {
2231    fn vpcmpgtd(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
2232        self.emit(VPCMPGTD256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2233    }
2234}
2235
2236impl<'a> VpcmpgtdEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
2237    fn vpcmpgtd(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
2238        self.emit(VPCMPGTD512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2239    }
2240}
2241
2242impl<'a> VpcmpgtdEmitter<KReg, Zmm, Mem> for Assembler<'a> {
2243    fn vpcmpgtd(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
2244        self.emit(VPCMPGTD512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2245    }
2246}
2247
2248/// `VPCMPGTQ` (VPCMPGTQ). 
2249/// Performs an SIMD signed compare for the packed quadwords in the destination operand (first operand) and the source operand (second operand). If the data element in the first (destination) operand is greater than the corresponding element in the second (source) operand, the corresponding data element in the destination is set to all 1s; otherwise, it is set to 0s.
2250///
2251///
2252/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTQ.html).
2253///
2254/// Supported operand variants:
2255///
2256/// ```text
2257/// +----+----------------+
2258/// | #  | Operands       |
2259/// +----+----------------+
2260/// | 1  | KReg, Xmm, Mem |
2261/// | 2  | KReg, Xmm, Xmm |
2262/// | 3  | KReg, Ymm, Mem |
2263/// | 4  | KReg, Ymm, Ymm |
2264/// | 5  | KReg, Zmm, Mem |
2265/// | 6  | KReg, Zmm, Zmm |
2266/// | 7  | Xmm, Xmm, Mem  |
2267/// | 8  | Xmm, Xmm, Xmm  |
2268/// | 9  | Ymm, Ymm, Mem  |
2269/// | 10 | Ymm, Ymm, Ymm  |
2270/// +----+----------------+
2271/// ```
2272pub trait VpcmpgtqEmitter<A, B, C> {
2273    fn vpcmpgtq(&mut self, op0: A, op1: B, op2: C);
2274}
2275
2276impl<'a> VpcmpgtqEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2277    fn vpcmpgtq(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2278        self.emit(VPCMPGTQ128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2279    }
2280}
2281
2282impl<'a> VpcmpgtqEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2283    fn vpcmpgtq(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2284        self.emit(VPCMPGTQ128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2285    }
2286}
2287
2288impl<'a> VpcmpgtqEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2289    fn vpcmpgtq(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2290        self.emit(VPCMPGTQ256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2291    }
2292}
2293
2294impl<'a> VpcmpgtqEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2295    fn vpcmpgtq(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2296        self.emit(VPCMPGTQ256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2297    }
2298}
2299
2300impl<'a> VpcmpgtqEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
2301    fn vpcmpgtq(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
2302        self.emit(VPCMPGTQ128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2303    }
2304}
2305
2306impl<'a> VpcmpgtqEmitter<KReg, Xmm, Mem> for Assembler<'a> {
2307    fn vpcmpgtq(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
2308        self.emit(VPCMPGTQ128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2309    }
2310}
2311
2312impl<'a> VpcmpgtqEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
2313    fn vpcmpgtq(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
2314        self.emit(VPCMPGTQ256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2315    }
2316}
2317
2318impl<'a> VpcmpgtqEmitter<KReg, Ymm, Mem> for Assembler<'a> {
2319    fn vpcmpgtq(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
2320        self.emit(VPCMPGTQ256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2321    }
2322}
2323
2324impl<'a> VpcmpgtqEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
2325    fn vpcmpgtq(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
2326        self.emit(VPCMPGTQ512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2327    }
2328}
2329
2330impl<'a> VpcmpgtqEmitter<KReg, Zmm, Mem> for Assembler<'a> {
2331    fn vpcmpgtq(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
2332        self.emit(VPCMPGTQ512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2333    }
2334}
2335
2336/// `VPCMPGTW` (VPCMPGTW). 
2337/// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2338///
2339///
2340/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
2341///
2342/// Supported operand variants:
2343///
2344/// ```text
2345/// +----+----------------+
2346/// | #  | Operands       |
2347/// +----+----------------+
2348/// | 1  | KReg, Xmm, Mem |
2349/// | 2  | KReg, Xmm, Xmm |
2350/// | 3  | KReg, Ymm, Mem |
2351/// | 4  | KReg, Ymm, Ymm |
2352/// | 5  | KReg, Zmm, Mem |
2353/// | 6  | KReg, Zmm, Zmm |
2354/// | 7  | Xmm, Xmm, Mem  |
2355/// | 8  | Xmm, Xmm, Xmm  |
2356/// | 9  | Ymm, Ymm, Mem  |
2357/// | 10 | Ymm, Ymm, Ymm  |
2358/// +----+----------------+
2359/// ```
2360pub trait VpcmpgtwEmitter<A, B, C> {
2361    fn vpcmpgtw(&mut self, op0: A, op1: B, op2: C);
2362}
2363
2364impl<'a> VpcmpgtwEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2365    fn vpcmpgtw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2366        self.emit(VPCMPGTW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2367    }
2368}
2369
2370impl<'a> VpcmpgtwEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2371    fn vpcmpgtw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2372        self.emit(VPCMPGTW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2373    }
2374}
2375
2376impl<'a> VpcmpgtwEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2377    fn vpcmpgtw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2378        self.emit(VPCMPGTW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2379    }
2380}
2381
2382impl<'a> VpcmpgtwEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2383    fn vpcmpgtw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2384        self.emit(VPCMPGTW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2385    }
2386}
2387
2388impl<'a> VpcmpgtwEmitter<KReg, Xmm, Xmm> for Assembler<'a> {
2389    fn vpcmpgtw(&mut self, op0: KReg, op1: Xmm, op2: Xmm) {
2390        self.emit(VPCMPGTW128KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2391    }
2392}
2393
2394impl<'a> VpcmpgtwEmitter<KReg, Xmm, Mem> for Assembler<'a> {
2395    fn vpcmpgtw(&mut self, op0: KReg, op1: Xmm, op2: Mem) {
2396        self.emit(VPCMPGTW128KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2397    }
2398}
2399
2400impl<'a> VpcmpgtwEmitter<KReg, Ymm, Ymm> for Assembler<'a> {
2401    fn vpcmpgtw(&mut self, op0: KReg, op1: Ymm, op2: Ymm) {
2402        self.emit(VPCMPGTW256KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2403    }
2404}
2405
2406impl<'a> VpcmpgtwEmitter<KReg, Ymm, Mem> for Assembler<'a> {
2407    fn vpcmpgtw(&mut self, op0: KReg, op1: Ymm, op2: Mem) {
2408        self.emit(VPCMPGTW256KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2409    }
2410}
2411
2412impl<'a> VpcmpgtwEmitter<KReg, Zmm, Zmm> for Assembler<'a> {
2413    fn vpcmpgtw(&mut self, op0: KReg, op1: Zmm, op2: Zmm) {
2414        self.emit(VPCMPGTW512KRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2415    }
2416}
2417
2418impl<'a> VpcmpgtwEmitter<KReg, Zmm, Mem> for Assembler<'a> {
2419    fn vpcmpgtw(&mut self, op0: KReg, op1: Zmm, op2: Mem) {
2420        self.emit(VPCMPGTW512KRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2421    }
2422}
2423
2424/// `VPCMPISTRI` (VPCMPISTRI). 
2425/// The instruction compares data from two strings based on the encoded value in the imm8 control byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”), and generates an index stored to ECX.
2426///
2427///
2428/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPISTRI.html).
2429///
2430/// Supported operand variants:
2431///
2432/// ```text
2433/// +---+---------------+
2434/// | # | Operands      |
2435/// +---+---------------+
2436/// | 1 | Xmm, Mem, Imm |
2437/// | 2 | Xmm, Xmm, Imm |
2438/// +---+---------------+
2439/// ```
2440pub trait VpcmpistriEmitter<A, B, C> {
2441    fn vpcmpistri(&mut self, op0: A, op1: B, op2: C);
2442}
2443
2444impl<'a> VpcmpistriEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2445    fn vpcmpistri(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2446        self.emit(VPCMPISTRIRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2447    }
2448}
2449
2450impl<'a> VpcmpistriEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2451    fn vpcmpistri(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2452        self.emit(VPCMPISTRIRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2453    }
2454}
2455
2456/// `VPCMPISTRM` (VPCMPISTRM). 
2457/// The instruction compares data from two strings based on the encoded value in the imm8 byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”) generating a mask stored to XMM0.
2458///
2459///
2460/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPISTRM.html).
2461///
2462/// Supported operand variants:
2463///
2464/// ```text
2465/// +---+---------------+
2466/// | # | Operands      |
2467/// +---+---------------+
2468/// | 1 | Xmm, Mem, Imm |
2469/// | 2 | Xmm, Xmm, Imm |
2470/// +---+---------------+
2471/// ```
2472pub trait VpcmpistrmEmitter<A, B, C> {
2473    fn vpcmpistrm(&mut self, op0: A, op1: B, op2: C);
2474}
2475
2476impl<'a> VpcmpistrmEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2477    fn vpcmpistrm(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2478        self.emit(VPCMPISTRMRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2479    }
2480}
2481
2482impl<'a> VpcmpistrmEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2483    fn vpcmpistrm(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2484        self.emit(VPCMPISTRMRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2485    }
2486}
2487
2488/// `VPERM2F128` (VPERM2F128). 
2489/// Permute 128 bit floating-point-containing fields from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
2490///
2491///
2492/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPERM2F128.html).
2493///
2494/// Supported operand variants:
2495///
2496/// ```text
2497/// +---+--------------------+
2498/// | # | Operands           |
2499/// +---+--------------------+
2500/// | 1 | Ymm, Ymm, Mem, Imm |
2501/// | 2 | Ymm, Ymm, Ymm, Imm |
2502/// +---+--------------------+
2503/// ```
2504pub trait Vperm2f128Emitter<A, B, C, D> {
2505    fn vperm2f128(&mut self, op0: A, op1: B, op2: C, op3: D);
2506}
2507
2508impl<'a> Vperm2f128Emitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
2509    fn vperm2f128(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
2510        self.emit(VPERM2F128_256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2511    }
2512}
2513
2514impl<'a> Vperm2f128Emitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
2515    fn vperm2f128(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
2516        self.emit(VPERM2F128_256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2517    }
2518}
2519
2520/// `VPEXTRD` (VPEXTRD). 
2521/// Extract a byte/dword/qword integer value from the source XMM register at a byte/dword/qword offset determined from imm8[3:0]. The destination can be a register or byte/dword/qword memory location. If the destination is a register, the upper bits of the register are zero extended.
2522///
2523///
2524/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRB%3APEXTRD%3APEXTRQ.html).
2525///
2526/// Supported operand variants:
2527///
2528/// ```text
2529/// +---+---------------+
2530/// | # | Operands      |
2531/// +---+---------------+
2532/// | 1 | Gpd, Xmm, Imm |
2533/// | 2 | Mem, Xmm, Imm |
2534/// +---+---------------+
2535/// ```
2536pub trait VpextrdEmitter<A, B, C> {
2537    fn vpextrd(&mut self, op0: A, op1: B, op2: C);
2538}
2539
2540impl<'a> VpextrdEmitter<Gpd, Xmm, Imm> for Assembler<'a> {
2541    fn vpextrd(&mut self, op0: Gpd, op1: Xmm, op2: Imm) {
2542        self.emit(VPEXTRDRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2543    }
2544}
2545
2546impl<'a> VpextrdEmitter<Mem, Xmm, Imm> for Assembler<'a> {
2547    fn vpextrd(&mut self, op0: Mem, op1: Xmm, op2: Imm) {
2548        self.emit(VPEXTRDMRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2549    }
2550}
2551
2552/// `VPEXTRQ` (VPEXTRQ). 
2553/// Extract a byte/dword/qword integer value from the source XMM register at a byte/dword/qword offset determined from imm8[3:0]. The destination can be a register or byte/dword/qword memory location. If the destination is a register, the upper bits of the register are zero extended.
2554///
2555///
2556/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRB%3APEXTRD%3APEXTRQ.html).
2557///
2558/// Supported operand variants:
2559///
2560/// ```text
2561/// +---+---------------+
2562/// | # | Operands      |
2563/// +---+---------------+
2564/// | 1 | Gpd, Xmm, Imm |
2565/// | 2 | Gpq, Xmm, Imm |
2566/// | 3 | Mem, Xmm, Imm |
2567/// +---+---------------+
2568/// ```
2569pub trait VpextrqEmitter<A, B, C> {
2570    fn vpextrq(&mut self, op0: A, op1: B, op2: C);
2571}
2572
2573impl<'a> VpextrqEmitter<Gpd, Xmm, Imm> for Assembler<'a> {
2574    fn vpextrq(&mut self, op0: Gpd, op1: Xmm, op2: Imm) {
2575        self.emit(VPEXTRQRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2576    }
2577}
2578
2579impl<'a> VpextrqEmitter<Mem, Xmm, Imm> for Assembler<'a> {
2580    fn vpextrq(&mut self, op0: Mem, op1: Xmm, op2: Imm) {
2581        self.emit(VPEXTRQMRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2582    }
2583}
2584
2585impl<'a> VpextrqEmitter<Gpq, Xmm, Imm> for Assembler<'a> {
2586    fn vpextrq(&mut self, op0: Gpq, op1: Xmm, op2: Imm) {
2587        self.emit(VPEXTRQRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2588    }
2589}
2590
2591/// `VPHADDD` (VPHADDD). 
2592/// (V)PHADDW adds two adjacent 16-bit signed integers horizontally from the source and destination operands and packs the 16-bit signed results to the destination operand (first operand). (V)PHADDD adds two adjacent 32-bit signed integers horizontally from the source and destination operands and packs the 32-bit signed results to the destination operand (first operand). When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
2593///
2594///
2595/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHADDW%3APHADDD.html).
2596///
2597/// Supported operand variants:
2598///
2599/// ```text
2600/// +---+---------------+
2601/// | # | Operands      |
2602/// +---+---------------+
2603/// | 1 | Xmm, Xmm, Mem |
2604/// | 2 | Xmm, Xmm, Xmm |
2605/// | 3 | Ymm, Ymm, Mem |
2606/// | 4 | Ymm, Ymm, Ymm |
2607/// +---+---------------+
2608/// ```
2609pub trait VphadddEmitter<A, B, C> {
2610    fn vphaddd(&mut self, op0: A, op1: B, op2: C);
2611}
2612
2613impl<'a> VphadddEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2614    fn vphaddd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2615        self.emit(VPHADDD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2616    }
2617}
2618
2619impl<'a> VphadddEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2620    fn vphaddd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2621        self.emit(VPHADDD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2622    }
2623}
2624
2625impl<'a> VphadddEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2626    fn vphaddd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2627        self.emit(VPHADDD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2628    }
2629}
2630
2631impl<'a> VphadddEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2632    fn vphaddd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2633        self.emit(VPHADDD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2634    }
2635}
2636
2637/// `VPHADDSW` (VPHADDSW). 
2638/// (V)PHADDSW adds two adjacent signed 16-bit integers horizontally from the source and destination operands and saturates the signed results; packs the signed, saturated 16-bit results to the destination operand (first operand) When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
2639///
2640///
2641/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHADDSW.html).
2642///
2643/// Supported operand variants:
2644///
2645/// ```text
2646/// +---+---------------+
2647/// | # | Operands      |
2648/// +---+---------------+
2649/// | 1 | Xmm, Xmm, Mem |
2650/// | 2 | Xmm, Xmm, Xmm |
2651/// | 3 | Ymm, Ymm, Mem |
2652/// | 4 | Ymm, Ymm, Ymm |
2653/// +---+---------------+
2654/// ```
2655pub trait VphaddswEmitter<A, B, C> {
2656    fn vphaddsw(&mut self, op0: A, op1: B, op2: C);
2657}
2658
2659impl<'a> VphaddswEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2660    fn vphaddsw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2661        self.emit(VPHADDSW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2662    }
2663}
2664
2665impl<'a> VphaddswEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2666    fn vphaddsw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2667        self.emit(VPHADDSW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2668    }
2669}
2670
2671impl<'a> VphaddswEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2672    fn vphaddsw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2673        self.emit(VPHADDSW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2674    }
2675}
2676
2677impl<'a> VphaddswEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2678    fn vphaddsw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2679        self.emit(VPHADDSW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2680    }
2681}
2682
2683/// `VPHADDW` (VPHADDW). 
2684/// (V)PHADDW adds two adjacent 16-bit signed integers horizontally from the source and destination operands and packs the 16-bit signed results to the destination operand (first operand). (V)PHADDD adds two adjacent 32-bit signed integers horizontally from the source and destination operands and packs the 32-bit signed results to the destination operand (first operand). When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
2685///
2686///
2687/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHADDW%3APHADDD.html).
2688///
2689/// Supported operand variants:
2690///
2691/// ```text
2692/// +---+---------------+
2693/// | # | Operands      |
2694/// +---+---------------+
2695/// | 1 | Xmm, Xmm, Mem |
2696/// | 2 | Xmm, Xmm, Xmm |
2697/// | 3 | Ymm, Ymm, Mem |
2698/// | 4 | Ymm, Ymm, Ymm |
2699/// +---+---------------+
2700/// ```
2701pub trait VphaddwEmitter<A, B, C> {
2702    fn vphaddw(&mut self, op0: A, op1: B, op2: C);
2703}
2704
2705impl<'a> VphaddwEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2706    fn vphaddw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2707        self.emit(VPHADDW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2708    }
2709}
2710
2711impl<'a> VphaddwEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2712    fn vphaddw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2713        self.emit(VPHADDW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2714    }
2715}
2716
2717impl<'a> VphaddwEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2718    fn vphaddw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2719        self.emit(VPHADDW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2720    }
2721}
2722
2723impl<'a> VphaddwEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2724    fn vphaddw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2725        self.emit(VPHADDW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2726    }
2727}
2728
2729/// `VPHMINPOSUW` (VPHMINPOSUW). 
2730/// Determine the minimum unsigned word value in the source operand (second operand) and place the unsigned word in the low word (bits 0-15) of the destination operand (first operand). The word index of the minimum value is stored in bits 16-18 of the destination operand. The remaining upper bits of the destination are set to zero.
2731///
2732///
2733/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHMINPOSUW.html).
2734///
2735/// Supported operand variants:
2736///
2737/// ```text
2738/// +---+----------+
2739/// | # | Operands |
2740/// +---+----------+
2741/// | 1 | Xmm, Mem |
2742/// | 2 | Xmm, Xmm |
2743/// +---+----------+
2744/// ```
2745pub trait VphminposuwEmitter<A, B> {
2746    fn vphminposuw(&mut self, op0: A, op1: B);
2747}
2748
2749impl<'a> VphminposuwEmitter<Xmm, Xmm> for Assembler<'a> {
2750    fn vphminposuw(&mut self, op0: Xmm, op1: Xmm) {
2751        self.emit(VPHMINPOSUW128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2752    }
2753}
2754
2755impl<'a> VphminposuwEmitter<Xmm, Mem> for Assembler<'a> {
2756    fn vphminposuw(&mut self, op0: Xmm, op1: Mem) {
2757        self.emit(VPHMINPOSUW128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2758    }
2759}
2760
2761/// `VPHSUBD` (VPHSUBD). 
2762/// (V)PHSUBW performs horizontal subtraction on each adjacent pair of 16-bit signed integers by subtracting the most significant word from the least significant word of each pair in the source and destination operands, and packs the signed 16-bit results to the destination operand (first operand). (V)PHSUBD performs horizontal subtraction on each adjacent pair of 32-bit signed integers by subtracting the most significant doubleword from the least significant doubleword of each pair, and packs the signed 32-bit result to the destination operand. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
2763///
2764///
2765/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHSUBW%3APHSUBD.html).
2766///
2767/// Supported operand variants:
2768///
2769/// ```text
2770/// +---+---------------+
2771/// | # | Operands      |
2772/// +---+---------------+
2773/// | 1 | Xmm, Xmm, Mem |
2774/// | 2 | Xmm, Xmm, Xmm |
2775/// | 3 | Ymm, Ymm, Mem |
2776/// | 4 | Ymm, Ymm, Ymm |
2777/// +---+---------------+
2778/// ```
2779pub trait VphsubdEmitter<A, B, C> {
2780    fn vphsubd(&mut self, op0: A, op1: B, op2: C);
2781}
2782
2783impl<'a> VphsubdEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2784    fn vphsubd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2785        self.emit(VPHSUBD128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2786    }
2787}
2788
2789impl<'a> VphsubdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2790    fn vphsubd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2791        self.emit(VPHSUBD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2792    }
2793}
2794
2795impl<'a> VphsubdEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2796    fn vphsubd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2797        self.emit(VPHSUBD256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2798    }
2799}
2800
2801impl<'a> VphsubdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2802    fn vphsubd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2803        self.emit(VPHSUBD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2804    }
2805}
2806
2807/// `VPHSUBSW` (VPHSUBSW). 
2808/// (V)PHSUBSW performs horizontal subtraction on each adjacent pair of 16-bit signed integers by subtracting the most significant word from the least significant word of each pair in the source and destination operands. The signed, saturated 16-bit results are packed to the destination operand (first operand). When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
2809///
2810///
2811/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHSUBSW.html).
2812///
2813/// Supported operand variants:
2814///
2815/// ```text
2816/// +---+---------------+
2817/// | # | Operands      |
2818/// +---+---------------+
2819/// | 1 | Xmm, Xmm, Mem |
2820/// | 2 | Xmm, Xmm, Xmm |
2821/// | 3 | Ymm, Ymm, Mem |
2822/// | 4 | Ymm, Ymm, Ymm |
2823/// +---+---------------+
2824/// ```
2825pub trait VphsubswEmitter<A, B, C> {
2826    fn vphsubsw(&mut self, op0: A, op1: B, op2: C);
2827}
2828
2829impl<'a> VphsubswEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2830    fn vphsubsw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2831        self.emit(VPHSUBSW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2832    }
2833}
2834
2835impl<'a> VphsubswEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2836    fn vphsubsw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2837        self.emit(VPHSUBSW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2838    }
2839}
2840
2841impl<'a> VphsubswEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2842    fn vphsubsw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2843        self.emit(VPHSUBSW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2844    }
2845}
2846
2847impl<'a> VphsubswEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2848    fn vphsubsw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2849        self.emit(VPHSUBSW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2850    }
2851}
2852
2853/// `VPHSUBW` (VPHSUBW). 
2854/// (V)PHSUBW performs horizontal subtraction on each adjacent pair of 16-bit signed integers by subtracting the most significant word from the least significant word of each pair in the source and destination operands, and packs the signed 16-bit results to the destination operand (first operand). (V)PHSUBD performs horizontal subtraction on each adjacent pair of 32-bit signed integers by subtracting the most significant doubleword from the least significant doubleword of each pair, and packs the signed 32-bit result to the destination operand. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
2855///
2856///
2857/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHSUBW%3APHSUBD.html).
2858///
2859/// Supported operand variants:
2860///
2861/// ```text
2862/// +---+---------------+
2863/// | # | Operands      |
2864/// +---+---------------+
2865/// | 1 | Xmm, Xmm, Mem |
2866/// | 2 | Xmm, Xmm, Xmm |
2867/// | 3 | Ymm, Ymm, Mem |
2868/// | 4 | Ymm, Ymm, Ymm |
2869/// +---+---------------+
2870/// ```
2871pub trait VphsubwEmitter<A, B, C> {
2872    fn vphsubw(&mut self, op0: A, op1: B, op2: C);
2873}
2874
2875impl<'a> VphsubwEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
2876    fn vphsubw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
2877        self.emit(VPHSUBW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2878    }
2879}
2880
2881impl<'a> VphsubwEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
2882    fn vphsubw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
2883        self.emit(VPHSUBW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2884    }
2885}
2886
2887impl<'a> VphsubwEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
2888    fn vphsubw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
2889        self.emit(VPHSUBW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2890    }
2891}
2892
2893impl<'a> VphsubwEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
2894    fn vphsubw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
2895        self.emit(VPHSUBW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2896    }
2897}
2898
2899/// `VPINSRD` (VPINSRD). 
2900/// Copies a byte/dword/qword from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other elements in the destination register are left untouched.) The source operand can be a general-purpose register or a memory location. (When the source operand is a general-purpose register, PINSRB copies the low byte of the register.) The destination operand is an XMM register. The count operand is an 8-bit immediate. When specifying a qword[dword, byte] location in an XMM register, the [2, 4] least-significant bit(s) of the count operand specify the location.
2901///
2902///
2903/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRB%3APINSRD%3APINSRQ.html).
2904///
2905/// Supported operand variants:
2906///
2907/// ```text
2908/// +---+--------------------+
2909/// | # | Operands           |
2910/// +---+--------------------+
2911/// | 1 | Xmm, Xmm, Gpd, Imm |
2912/// | 2 | Xmm, Xmm, Mem, Imm |
2913/// +---+--------------------+
2914/// ```
2915pub trait VpinsrdEmitter<A, B, C, D> {
2916    fn vpinsrd(&mut self, op0: A, op1: B, op2: C, op3: D);
2917}
2918
2919impl<'a> VpinsrdEmitter<Xmm, Xmm, Gpd, Imm> for Assembler<'a> {
2920    fn vpinsrd(&mut self, op0: Xmm, op1: Xmm, op2: Gpd, op3: Imm) {
2921        self.emit(VPINSRDRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2922    }
2923}
2924
2925impl<'a> VpinsrdEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
2926    fn vpinsrd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
2927        self.emit(VPINSRDRRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2928    }
2929}
2930
2931/// `VPINSRQ` (VPINSRQ). 
2932/// Copies a byte/dword/qword from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other elements in the destination register are left untouched.) The source operand can be a general-purpose register or a memory location. (When the source operand is a general-purpose register, PINSRB copies the low byte of the register.) The destination operand is an XMM register. The count operand is an 8-bit immediate. When specifying a qword[dword, byte] location in an XMM register, the [2, 4] least-significant bit(s) of the count operand specify the location.
2933///
2934///
2935/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRB%3APINSRD%3APINSRQ.html).
2936///
2937/// Supported operand variants:
2938///
2939/// ```text
2940/// +---+--------------------+
2941/// | # | Operands           |
2942/// +---+--------------------+
2943/// | 1 | Xmm, Xmm, Gpd, Imm |
2944/// | 2 | Xmm, Xmm, Gpq, Imm |
2945/// | 3 | Xmm, Xmm, Mem, Imm |
2946/// +---+--------------------+
2947/// ```
2948pub trait VpinsrqEmitter<A, B, C, D> {
2949    fn vpinsrq(&mut self, op0: A, op1: B, op2: C, op3: D);
2950}
2951
2952impl<'a> VpinsrqEmitter<Xmm, Xmm, Gpd, Imm> for Assembler<'a> {
2953    fn vpinsrq(&mut self, op0: Xmm, op1: Xmm, op2: Gpd, op3: Imm) {
2954        self.emit(VPINSRQRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2955    }
2956}
2957
2958impl<'a> VpinsrqEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
2959    fn vpinsrq(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
2960        self.emit(VPINSRQRRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2961    }
2962}
2963
2964impl<'a> VpinsrqEmitter<Xmm, Xmm, Gpq, Imm> for Assembler<'a> {
2965    fn vpinsrq(&mut self, op0: Xmm, op1: Xmm, op2: Gpq, op3: Imm) {
2966        self.emit(VPINSRQRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
2967    }
2968}
2969
2970/// `VPMOVMSKB` (VPMOVMSKB). 
2971/// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
2972///
2973///
2974/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
2975///
2976/// Supported operand variants:
2977///
2978/// ```text
2979/// +---+----------+
2980/// | # | Operands |
2981/// +---+----------+
2982/// | 1 | Gpd, Xmm |
2983/// | 2 | Gpd, Ymm |
2984/// +---+----------+
2985/// ```
2986pub trait VpmovmskbEmitter<A, B> {
2987    fn vpmovmskb(&mut self, op0: A, op1: B);
2988}
2989
2990impl<'a> VpmovmskbEmitter<Gpd, Xmm> for Assembler<'a> {
2991    fn vpmovmskb(&mut self, op0: Gpd, op1: Xmm) {
2992        self.emit(VPMOVMSKB128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2993    }
2994}
2995
2996impl<'a> VpmovmskbEmitter<Gpd, Ymm> for Assembler<'a> {
2997    fn vpmovmskb(&mut self, op0: Gpd, op1: Ymm) {
2998        self.emit(VPMOVMSKB256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2999    }
3000}
3001
3002/// `VPOR` (VPOR). 
3003/// Performs a bitwise logical OR operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is set to 1 if either or both of the corresponding bits of the first and second operands are 1; otherwise, it is set to 0.
3004///
3005///
3006/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/POR.html).
3007///
3008/// Supported operand variants:
3009///
3010/// ```text
3011/// +---+---------------+
3012/// | # | Operands      |
3013/// +---+---------------+
3014/// | 1 | Xmm, Xmm, Mem |
3015/// | 2 | Xmm, Xmm, Xmm |
3016/// | 3 | Ymm, Ymm, Mem |
3017/// | 4 | Ymm, Ymm, Ymm |
3018/// +---+---------------+
3019/// ```
3020pub trait VporEmitter<A, B, C> {
3021    fn vpor(&mut self, op0: A, op1: B, op2: C);
3022}
3023
3024impl<'a> VporEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3025    fn vpor(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3026        self.emit(VPOR128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3027    }
3028}
3029
3030impl<'a> VporEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3031    fn vpor(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3032        self.emit(VPOR128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3033    }
3034}
3035
3036impl<'a> VporEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
3037    fn vpor(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
3038        self.emit(VPOR256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3039    }
3040}
3041
3042impl<'a> VporEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
3043    fn vpor(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
3044        self.emit(VPOR256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3045    }
3046}
3047
3048/// `VPSIGNB` (VPSIGNB). 
3049/// (V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
3050///
3051///
3052/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html).
3053///
3054/// Supported operand variants:
3055///
3056/// ```text
3057/// +---+---------------+
3058/// | # | Operands      |
3059/// +---+---------------+
3060/// | 1 | Xmm, Xmm, Mem |
3061/// | 2 | Xmm, Xmm, Xmm |
3062/// | 3 | Ymm, Ymm, Mem |
3063/// | 4 | Ymm, Ymm, Ymm |
3064/// +---+---------------+
3065/// ```
3066pub trait VpsignbEmitter<A, B, C> {
3067    fn vpsignb(&mut self, op0: A, op1: B, op2: C);
3068}
3069
3070impl<'a> VpsignbEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3071    fn vpsignb(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3072        self.emit(VPSIGNB128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3073    }
3074}
3075
3076impl<'a> VpsignbEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3077    fn vpsignb(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3078        self.emit(VPSIGNB128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3079    }
3080}
3081
3082impl<'a> VpsignbEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
3083    fn vpsignb(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
3084        self.emit(VPSIGNB256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3085    }
3086}
3087
3088impl<'a> VpsignbEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
3089    fn vpsignb(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
3090        self.emit(VPSIGNB256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3091    }
3092}
3093
3094/// `VPSIGND` (VPSIGND). 
3095/// (V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
3096///
3097///
3098/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html).
3099///
3100/// Supported operand variants:
3101///
3102/// ```text
3103/// +---+---------------+
3104/// | # | Operands      |
3105/// +---+---------------+
3106/// | 1 | Xmm, Xmm, Mem |
3107/// | 2 | Xmm, Xmm, Xmm |
3108/// | 3 | Ymm, Ymm, Mem |
3109/// | 4 | Ymm, Ymm, Ymm |
3110/// +---+---------------+
3111/// ```
3112pub trait VpsigndEmitter<A, B, C> {
3113    fn vpsignd(&mut self, op0: A, op1: B, op2: C);
3114}
3115
3116impl<'a> VpsigndEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3117    fn vpsignd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3118        self.emit(VPSIGND128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3119    }
3120}
3121
3122impl<'a> VpsigndEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3123    fn vpsignd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3124        self.emit(VPSIGND128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3125    }
3126}
3127
3128impl<'a> VpsigndEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
3129    fn vpsignd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
3130        self.emit(VPSIGND256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3131    }
3132}
3133
3134impl<'a> VpsigndEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
3135    fn vpsignd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
3136        self.emit(VPSIGND256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3137    }
3138}
3139
3140/// `VPSIGNW` (VPSIGNW). 
3141/// (V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
3142///
3143///
3144/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html).
3145///
3146/// Supported operand variants:
3147///
3148/// ```text
3149/// +---+---------------+
3150/// | # | Operands      |
3151/// +---+---------------+
3152/// | 1 | Xmm, Xmm, Mem |
3153/// | 2 | Xmm, Xmm, Xmm |
3154/// | 3 | Ymm, Ymm, Mem |
3155/// | 4 | Ymm, Ymm, Ymm |
3156/// +---+---------------+
3157/// ```
3158pub trait VpsignwEmitter<A, B, C> {
3159    fn vpsignw(&mut self, op0: A, op1: B, op2: C);
3160}
3161
3162impl<'a> VpsignwEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3163    fn vpsignw(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3164        self.emit(VPSIGNW128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3165    }
3166}
3167
3168impl<'a> VpsignwEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3169    fn vpsignw(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3170        self.emit(VPSIGNW128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3171    }
3172}
3173
3174impl<'a> VpsignwEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
3175    fn vpsignw(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
3176        self.emit(VPSIGNW256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3177    }
3178}
3179
3180impl<'a> VpsignwEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
3181    fn vpsignw(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
3182        self.emit(VPSIGNW256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3183    }
3184}
3185
3186/// `VPTEST` (VPTEST). 
3187/// PTEST and VPTEST set the ZF flag if all bits in the result are 0 of the bitwise AND of the first source operand (first operand) and the second source operand (second operand). VPTEST sets the CF flag if all bits in the result are 0 of the bitwise AND of the second source operand (second operand) and the logical NOT of the destination operand.
3188///
3189///
3190/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PTEST.html).
3191///
3192/// Supported operand variants:
3193///
3194/// ```text
3195/// +---+----------+
3196/// | # | Operands |
3197/// +---+----------+
3198/// | 1 | Xmm, Mem |
3199/// | 2 | Xmm, Xmm |
3200/// | 3 | Ymm, Mem |
3201/// | 4 | Ymm, Ymm |
3202/// +---+----------+
3203/// ```
3204pub trait VptestEmitter<A, B> {
3205    fn vptest(&mut self, op0: A, op1: B);
3206}
3207
3208impl<'a> VptestEmitter<Xmm, Xmm> for Assembler<'a> {
3209    fn vptest(&mut self, op0: Xmm, op1: Xmm) {
3210        self.emit(VPTEST128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3211    }
3212}
3213
3214impl<'a> VptestEmitter<Xmm, Mem> for Assembler<'a> {
3215    fn vptest(&mut self, op0: Xmm, op1: Mem) {
3216        self.emit(VPTEST128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3217    }
3218}
3219
3220impl<'a> VptestEmitter<Ymm, Ymm> for Assembler<'a> {
3221    fn vptest(&mut self, op0: Ymm, op1: Ymm) {
3222        self.emit(VPTEST256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3223    }
3224}
3225
3226impl<'a> VptestEmitter<Ymm, Mem> for Assembler<'a> {
3227    fn vptest(&mut self, op0: Ymm, op1: Mem) {
3228        self.emit(VPTEST256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3229    }
3230}
3231
3232/// `VPXOR` (VPXOR). 
3233/// Performs a bitwise logical exclusive-OR (XOR) operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is 1 if the corresponding bits of the two operands are different; each bit is 0 if the corresponding bits of the operands are the same.
3234///
3235///
3236/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PXOR.html).
3237///
3238/// Supported operand variants:
3239///
3240/// ```text
3241/// +---+---------------+
3242/// | # | Operands      |
3243/// +---+---------------+
3244/// | 1 | Xmm, Xmm, Mem |
3245/// | 2 | Xmm, Xmm, Xmm |
3246/// | 3 | Ymm, Ymm, Mem |
3247/// | 4 | Ymm, Ymm, Ymm |
3248/// +---+---------------+
3249/// ```
3250pub trait VpxorEmitter<A, B, C> {
3251    fn vpxor(&mut self, op0: A, op1: B, op2: C);
3252}
3253
3254impl<'a> VpxorEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3255    fn vpxor(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3256        self.emit(VPXOR128RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3257    }
3258}
3259
3260impl<'a> VpxorEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3261    fn vpxor(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3262        self.emit(VPXOR128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3263    }
3264}
3265
3266impl<'a> VpxorEmitter<Ymm, Ymm, Ymm> for Assembler<'a> {
3267    fn vpxor(&mut self, op0: Ymm, op1: Ymm, op2: Ymm) {
3268        self.emit(VPXOR256RRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3269    }
3270}
3271
3272impl<'a> VpxorEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
3273    fn vpxor(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
3274        self.emit(VPXOR256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3275    }
3276}
3277
3278/// `VRCPPS` (VRCPPS). 
3279/// Performs a SIMD computation of the approximate reciprocals of the four packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
3280///
3281///
3282/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPPS.html).
3283///
3284/// Supported operand variants:
3285///
3286/// ```text
3287/// +---+----------+
3288/// | # | Operands |
3289/// +---+----------+
3290/// | 1 | Xmm, Mem |
3291/// | 2 | Xmm, Xmm |
3292/// | 3 | Ymm, Mem |
3293/// | 4 | Ymm, Ymm |
3294/// +---+----------+
3295/// ```
3296pub trait VrcppsEmitter<A, B> {
3297    fn vrcpps(&mut self, op0: A, op1: B);
3298}
3299
3300impl<'a> VrcppsEmitter<Xmm, Xmm> for Assembler<'a> {
3301    fn vrcpps(&mut self, op0: Xmm, op1: Xmm) {
3302        self.emit(VRCPPS128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3303    }
3304}
3305
3306impl<'a> VrcppsEmitter<Xmm, Mem> for Assembler<'a> {
3307    fn vrcpps(&mut self, op0: Xmm, op1: Mem) {
3308        self.emit(VRCPPS128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3309    }
3310}
3311
3312impl<'a> VrcppsEmitter<Ymm, Ymm> for Assembler<'a> {
3313    fn vrcpps(&mut self, op0: Ymm, op1: Ymm) {
3314        self.emit(VRCPPS256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3315    }
3316}
3317
3318impl<'a> VrcppsEmitter<Ymm, Mem> for Assembler<'a> {
3319    fn vrcpps(&mut self, op0: Ymm, op1: Mem) {
3320        self.emit(VRCPPS256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3321    }
3322}
3323
3324/// `VRCPSS` (VRCPSS). 
3325/// Computes of an approximate reciprocal of the low single precision floating-point value in the source operand (second operand) and stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
3326///
3327///
3328/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPSS.html).
3329///
3330/// Supported operand variants:
3331///
3332/// ```text
3333/// +---+---------------+
3334/// | # | Operands      |
3335/// +---+---------------+
3336/// | 1 | Xmm, Xmm, Mem |
3337/// | 2 | Xmm, Xmm, Xmm |
3338/// +---+---------------+
3339/// ```
3340pub trait VrcpssEmitter<A, B, C> {
3341    fn vrcpss(&mut self, op0: A, op1: B, op2: C);
3342}
3343
3344impl<'a> VrcpssEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3345    fn vrcpss(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3346        self.emit(VRCPSSRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3347    }
3348}
3349
3350impl<'a> VrcpssEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3351    fn vrcpss(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3352        self.emit(VRCPSSRRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3353    }
3354}
3355
3356/// `VROUNDPD` (VROUNDPD). 
3357/// Round the 2 double precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double precision floating-point value.
3358///
3359///
3360/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDPD.html).
3361///
3362/// Supported operand variants:
3363///
3364/// ```text
3365/// +---+---------------+
3366/// | # | Operands      |
3367/// +---+---------------+
3368/// | 1 | Xmm, Mem, Imm |
3369/// | 2 | Xmm, Xmm, Imm |
3370/// | 3 | Ymm, Mem, Imm |
3371/// | 4 | Ymm, Ymm, Imm |
3372/// +---+---------------+
3373/// ```
3374pub trait VroundpdEmitter<A, B, C> {
3375    fn vroundpd(&mut self, op0: A, op1: B, op2: C);
3376}
3377
3378impl<'a> VroundpdEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
3379    fn vroundpd(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
3380        self.emit(VROUNDPD128RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3381    }
3382}
3383
3384impl<'a> VroundpdEmitter<Xmm, Mem, Imm> for Assembler<'a> {
3385    fn vroundpd(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
3386        self.emit(VROUNDPD128RMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3387    }
3388}
3389
3390impl<'a> VroundpdEmitter<Ymm, Ymm, Imm> for Assembler<'a> {
3391    fn vroundpd(&mut self, op0: Ymm, op1: Ymm, op2: Imm) {
3392        self.emit(VROUNDPD256RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3393    }
3394}
3395
3396impl<'a> VroundpdEmitter<Ymm, Mem, Imm> for Assembler<'a> {
3397    fn vroundpd(&mut self, op0: Ymm, op1: Mem, op2: Imm) {
3398        self.emit(VROUNDPD256RMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3399    }
3400}
3401
3402/// `VROUNDPS` (VROUNDPS). 
3403/// Round the 4 single precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single precision floating-point value.
3404///
3405///
3406/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDPS.html).
3407///
3408/// Supported operand variants:
3409///
3410/// ```text
3411/// +---+---------------+
3412/// | # | Operands      |
3413/// +---+---------------+
3414/// | 1 | Xmm, Mem, Imm |
3415/// | 2 | Xmm, Xmm, Imm |
3416/// | 3 | Ymm, Mem, Imm |
3417/// | 4 | Ymm, Ymm, Imm |
3418/// +---+---------------+
3419/// ```
3420pub trait VroundpsEmitter<A, B, C> {
3421    fn vroundps(&mut self, op0: A, op1: B, op2: C);
3422}
3423
3424impl<'a> VroundpsEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
3425    fn vroundps(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
3426        self.emit(VROUNDPS128RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3427    }
3428}
3429
3430impl<'a> VroundpsEmitter<Xmm, Mem, Imm> for Assembler<'a> {
3431    fn vroundps(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
3432        self.emit(VROUNDPS128RMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3433    }
3434}
3435
3436impl<'a> VroundpsEmitter<Ymm, Ymm, Imm> for Assembler<'a> {
3437    fn vroundps(&mut self, op0: Ymm, op1: Ymm, op2: Imm) {
3438        self.emit(VROUNDPS256RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3439    }
3440}
3441
3442impl<'a> VroundpsEmitter<Ymm, Mem, Imm> for Assembler<'a> {
3443    fn vroundps(&mut self, op0: Ymm, op1: Mem, op2: Imm) {
3444        self.emit(VROUNDPS256RMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3445    }
3446}
3447
3448/// `VROUNDSD` (VROUNDSD). 
3449/// Round the double precision floating-point value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.
3450///
3451///
3452/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDSD.html).
3453///
3454/// Supported operand variants:
3455///
3456/// ```text
3457/// +---+--------------------+
3458/// | # | Operands           |
3459/// +---+--------------------+
3460/// | 1 | Xmm, Xmm, Mem, Imm |
3461/// | 2 | Xmm, Xmm, Xmm, Imm |
3462/// +---+--------------------+
3463/// ```
3464pub trait VroundsdEmitter<A, B, C, D> {
3465    fn vroundsd(&mut self, op0: A, op1: B, op2: C, op3: D);
3466}
3467
3468impl<'a> VroundsdEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
3469    fn vroundsd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
3470        self.emit(VROUNDSDRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
3471    }
3472}
3473
3474impl<'a> VroundsdEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
3475    fn vroundsd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
3476        self.emit(VROUNDSDRRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
3477    }
3478}
3479
3480/// `VROUNDSS` (VROUNDSS). 
3481/// Round the single precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single precision floating-point input to an integer value and returns the result as a single precision floating-point value in the lowest position. The upper three single precision floating-point values in the destination are retained.
3482///
3483///
3484/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDSS.html).
3485///
3486/// Supported operand variants:
3487///
3488/// ```text
3489/// +---+--------------------+
3490/// | # | Operands           |
3491/// +---+--------------------+
3492/// | 1 | Xmm, Xmm, Mem, Imm |
3493/// | 2 | Xmm, Xmm, Xmm, Imm |
3494/// +---+--------------------+
3495/// ```
3496pub trait VroundssEmitter<A, B, C, D> {
3497    fn vroundss(&mut self, op0: A, op1: B, op2: C, op3: D);
3498}
3499
3500impl<'a> VroundssEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
3501    fn vroundss(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
3502        self.emit(VROUNDSSRRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
3503    }
3504}
3505
3506impl<'a> VroundssEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
3507    fn vroundss(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
3508        self.emit(VROUNDSSRRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
3509    }
3510}
3511
3512/// `VRSQRTPS` (VRSQRTPS). 
3513/// Performs a SIMD computation of the approximate reciprocals of the square roots of the four packed single precision floating-point values in the source operand (second operand) and stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
3514///
3515///
3516/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTPS.html).
3517///
3518/// Supported operand variants:
3519///
3520/// ```text
3521/// +---+----------+
3522/// | # | Operands |
3523/// +---+----------+
3524/// | 1 | Xmm, Mem |
3525/// | 2 | Xmm, Xmm |
3526/// | 3 | Ymm, Mem |
3527/// | 4 | Ymm, Ymm |
3528/// +---+----------+
3529/// ```
3530pub trait VrsqrtpsEmitter<A, B> {
3531    fn vrsqrtps(&mut self, op0: A, op1: B);
3532}
3533
3534impl<'a> VrsqrtpsEmitter<Xmm, Xmm> for Assembler<'a> {
3535    fn vrsqrtps(&mut self, op0: Xmm, op1: Xmm) {
3536        self.emit(VRSQRTPS128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3537    }
3538}
3539
3540impl<'a> VrsqrtpsEmitter<Xmm, Mem> for Assembler<'a> {
3541    fn vrsqrtps(&mut self, op0: Xmm, op1: Mem) {
3542        self.emit(VRSQRTPS128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3543    }
3544}
3545
3546impl<'a> VrsqrtpsEmitter<Ymm, Ymm> for Assembler<'a> {
3547    fn vrsqrtps(&mut self, op0: Ymm, op1: Ymm) {
3548        self.emit(VRSQRTPS256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3549    }
3550}
3551
3552impl<'a> VrsqrtpsEmitter<Ymm, Mem> for Assembler<'a> {
3553    fn vrsqrtps(&mut self, op0: Ymm, op1: Mem) {
3554        self.emit(VRSQRTPS256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3555    }
3556}
3557
3558/// `VRSQRTSS` (VRSQRTSS). 
3559/// Computes an approximate reciprocal of the square root of the low single precision floating-point value in the source operand (second operand) stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
3560///
3561///
3562/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTSS.html).
3563///
3564/// Supported operand variants:
3565///
3566/// ```text
3567/// +---+---------------+
3568/// | # | Operands      |
3569/// +---+---------------+
3570/// | 1 | Xmm, Xmm, Mem |
3571/// | 2 | Xmm, Xmm, Xmm |
3572/// +---+---------------+
3573/// ```
3574pub trait VrsqrtssEmitter<A, B, C> {
3575    fn vrsqrtss(&mut self, op0: A, op1: B, op2: C);
3576}
3577
3578impl<'a> VrsqrtssEmitter<Xmm, Xmm, Xmm> for Assembler<'a> {
3579    fn vrsqrtss(&mut self, op0: Xmm, op1: Xmm, op2: Xmm) {
3580        self.emit(VRSQRTSSRRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3581    }
3582}
3583
3584impl<'a> VrsqrtssEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
3585    fn vrsqrtss(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
3586        self.emit(VRSQRTSSRRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3587    }
3588}
3589
3590/// `VSTMXCSR` (VSTMXCSR). 
3591/// Stores the contents of the MXCSR control and status register to the destination operand. The destination operand is a 32-bit memory location. The reserved bits in the MXCSR register are stored as 0s.
3592///
3593///
3594/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/STMXCSR.html).
3595///
3596/// Supported operand variants:
3597///
3598/// ```text
3599/// +---+----------+
3600/// | # | Operands |
3601/// +---+----------+
3602/// | 1 | Mem      |
3603/// +---+----------+
3604/// ```
3605pub trait VstmxcsrEmitter<A> {
3606    fn vstmxcsr(&mut self, op0: A);
3607}
3608
3609impl<'a> VstmxcsrEmitter<Mem> for Assembler<'a> {
3610    fn vstmxcsr(&mut self, op0: Mem) {
3611        self.emit(VSTMXCSRM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
3612    }
3613}
3614
3615/// `VTESTPD` (VTESTPD). 
3616/// VTESTPS performs a bitwise comparison of all the sign bits of the packed single-precision elements in the first source operation and corresponding sign bits in the second source operand. If the AND of the source sign bits with the dest sign bits produces all zeros, the ZF is set else the ZF is clear. If the AND of the source sign bits with the inverted dest sign bits produces all zeros the CF is set else the CF is clear. An attempt to execute VTESTPS with VEX.W=1 will cause #UD.
3617///
3618///
3619/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VTESTPD%3AVTESTPS.html).
3620///
3621/// Supported operand variants:
3622///
3623/// ```text
3624/// +---+----------+
3625/// | # | Operands |
3626/// +---+----------+
3627/// | 1 | Xmm, Mem |
3628/// | 2 | Xmm, Xmm |
3629/// | 3 | Ymm, Mem |
3630/// | 4 | Ymm, Ymm |
3631/// +---+----------+
3632/// ```
3633pub trait VtestpdEmitter<A, B> {
3634    fn vtestpd(&mut self, op0: A, op1: B);
3635}
3636
3637impl<'a> VtestpdEmitter<Xmm, Xmm> for Assembler<'a> {
3638    fn vtestpd(&mut self, op0: Xmm, op1: Xmm) {
3639        self.emit(VTESTPD128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3640    }
3641}
3642
3643impl<'a> VtestpdEmitter<Xmm, Mem> for Assembler<'a> {
3644    fn vtestpd(&mut self, op0: Xmm, op1: Mem) {
3645        self.emit(VTESTPD128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3646    }
3647}
3648
3649impl<'a> VtestpdEmitter<Ymm, Ymm> for Assembler<'a> {
3650    fn vtestpd(&mut self, op0: Ymm, op1: Ymm) {
3651        self.emit(VTESTPD256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3652    }
3653}
3654
3655impl<'a> VtestpdEmitter<Ymm, Mem> for Assembler<'a> {
3656    fn vtestpd(&mut self, op0: Ymm, op1: Mem) {
3657        self.emit(VTESTPD256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3658    }
3659}
3660
3661/// `VTESTPS` (VTESTPS). 
3662/// VTESTPS performs a bitwise comparison of all the sign bits of the packed single-precision elements in the first source operation and corresponding sign bits in the second source operand. If the AND of the source sign bits with the dest sign bits produces all zeros, the ZF is set else the ZF is clear. If the AND of the source sign bits with the inverted dest sign bits produces all zeros the CF is set else the CF is clear. An attempt to execute VTESTPS with VEX.W=1 will cause #UD.
3663///
3664///
3665/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VTESTPD%3AVTESTPS.html).
3666///
3667/// Supported operand variants:
3668///
3669/// ```text
3670/// +---+----------+
3671/// | # | Operands |
3672/// +---+----------+
3673/// | 1 | Xmm, Mem |
3674/// | 2 | Xmm, Xmm |
3675/// | 3 | Ymm, Mem |
3676/// | 4 | Ymm, Ymm |
3677/// +---+----------+
3678/// ```
3679pub trait VtestpsEmitter<A, B> {
3680    fn vtestps(&mut self, op0: A, op1: B);
3681}
3682
3683impl<'a> VtestpsEmitter<Xmm, Xmm> for Assembler<'a> {
3684    fn vtestps(&mut self, op0: Xmm, op1: Xmm) {
3685        self.emit(VTESTPS128RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3686    }
3687}
3688
3689impl<'a> VtestpsEmitter<Xmm, Mem> for Assembler<'a> {
3690    fn vtestps(&mut self, op0: Xmm, op1: Mem) {
3691        self.emit(VTESTPS128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3692    }
3693}
3694
3695impl<'a> VtestpsEmitter<Ymm, Ymm> for Assembler<'a> {
3696    fn vtestps(&mut self, op0: Ymm, op1: Ymm) {
3697        self.emit(VTESTPS256RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3698    }
3699}
3700
3701impl<'a> VtestpsEmitter<Ymm, Mem> for Assembler<'a> {
3702    fn vtestps(&mut self, op0: Ymm, op1: Mem) {
3703        self.emit(VTESTPS256RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3704    }
3705}
3706
3707/// `VZEROALL` (VZEROALL). 
3708/// In 64-bit mode, the instruction zeroes XMM0-XMM15, YMM0-YMM15, and ZMM0-ZMM15. Outside 64-bit mode, it zeroes only XMM0-XMM7, YMM0-YMM7, and ZMM0-ZMM7. VZEROALL does not modify ZMM16-ZMM31.
3709///
3710///
3711/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VZEROALL.html).
3712///
3713/// Supported operand variants:
3714///
3715/// ```text
3716/// +---+----------+
3717/// | # | Operands |
3718/// +---+----------+
3719/// | 1 | (none)   |
3720/// +---+----------+
3721/// ```
3722pub trait VzeroallEmitter {
3723    fn vzeroall(&mut self);
3724}
3725
3726impl<'a> VzeroallEmitter for Assembler<'a> {
3727    fn vzeroall(&mut self) {
3728        self.emit(VZEROALL, &NOREG, &NOREG, &NOREG, &NOREG);
3729    }
3730}
3731
3732/// `VZEROUPPER` (VZEROUPPER). 
3733/// In 64-bit mode, the instruction zeroes the bits in positions 128 and higher in YMM0-YMM15 and ZMM0-ZMM15. Outside 64-bit mode, it zeroes those bits only in YMM0-YMM7 and ZMM0-ZMM7. VZEROUPPER does not modify the lower 128 bits of these registers and it does not modify ZMM16-ZMM31.
3734///
3735///
3736/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VZEROUPPER.html).
3737///
3738/// Supported operand variants:
3739///
3740/// ```text
3741/// +---+----------+
3742/// | # | Operands |
3743/// +---+----------+
3744/// | 1 | (none)   |
3745/// +---+----------+
3746/// ```
3747pub trait VzeroupperEmitter {
3748    fn vzeroupper(&mut self);
3749}
3750
3751impl<'a> VzeroupperEmitter for Assembler<'a> {
3752    fn vzeroupper(&mut self) {
3753        self.emit(VZEROUPPER, &NOREG, &NOREG, &NOREG, &NOREG);
3754    }
3755}
3756
3757
3758impl<'a> Assembler<'a> {
3759    /// `VADDSUBPD` (VADDSUBPD). 
3760    /// Adds odd-numbered double precision floating-point values of the first source operand (second operand) with the corresponding double precision floating-point values from the second source operand (third operand); stores the result in the odd-numbered values of the destination operand (first operand). Subtracts the even-numbered double precision floating-point values from the second source operand from the corresponding double precision floating values in the first source operand; stores the result into the even-numbered values of the destination operand.
3761    ///
3762    ///
3763    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSUBPD.html).
3764    ///
3765    /// Supported operand variants:
3766    ///
3767    /// ```text
3768    /// +---+---------------+
3769    /// | # | Operands      |
3770    /// +---+---------------+
3771    /// | 1 | Xmm, Xmm, Mem |
3772    /// | 2 | Xmm, Xmm, Xmm |
3773    /// | 3 | Ymm, Ymm, Mem |
3774    /// | 4 | Ymm, Ymm, Ymm |
3775    /// +---+---------------+
3776    /// ```
3777    #[inline]
3778    pub fn vaddsubpd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
3779    where Assembler<'a>: VaddsubpdEmitter<A, B, C> {
3780        <Self as VaddsubpdEmitter<A, B, C>>::vaddsubpd(self, op0, op1, op2);
3781    }
3782    /// `VADDSUBPS` (VADDSUBPS). 
3783    /// Adds odd-numbered single precision floating-point values of the first source operand (second operand) with the corresponding single precision floating-point values from the second source operand (third operand); stores the result in the odd-numbered values of the destination operand (first operand). Subtracts the even-numbered single precision floating-point values from the second source operand from the corresponding single precision floating values in the first source operand; stores the result into the even-numbered values of the destination operand.
3784    ///
3785    ///
3786    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSUBPS.html).
3787    ///
3788    /// Supported operand variants:
3789    ///
3790    /// ```text
3791    /// +---+---------------+
3792    /// | # | Operands      |
3793    /// +---+---------------+
3794    /// | 1 | Xmm, Xmm, Mem |
3795    /// | 2 | Xmm, Xmm, Xmm |
3796    /// | 3 | Ymm, Ymm, Mem |
3797    /// | 4 | Ymm, Ymm, Ymm |
3798    /// +---+---------------+
3799    /// ```
3800    #[inline]
3801    pub fn vaddsubps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
3802    where Assembler<'a>: VaddsubpsEmitter<A, B, C> {
3803        <Self as VaddsubpsEmitter<A, B, C>>::vaddsubps(self, op0, op1, op2);
3804    }
3805    /// `VBLENDPD` (VBLENDPD). 
3806    /// Double-precision floating-point values from the second source operand (third operand) are conditionally merged with values from the first source operand (second operand) and written to the destination operand (first operand). The immediate bits [3:0] determine whether the corresponding double precision floating-point value in the destination is copied from the second source or first source. If a bit in the mask, corresponding to a word, is ”1”, then the double precision floating-point value in the second source operand is copied, else the value in the first source operand is copied.
3807    ///
3808    ///
3809    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDPD.html).
3810    ///
3811    /// Supported operand variants:
3812    ///
3813    /// ```text
3814    /// +---+--------------------+
3815    /// | # | Operands           |
3816    /// +---+--------------------+
3817    /// | 1 | Xmm, Xmm, Mem, Imm |
3818    /// | 2 | Xmm, Xmm, Xmm, Imm |
3819    /// | 3 | Ymm, Ymm, Mem, Imm |
3820    /// | 4 | Ymm, Ymm, Ymm, Imm |
3821    /// +---+--------------------+
3822    /// ```
3823    #[inline]
3824    pub fn vblendpd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3825    where Assembler<'a>: VblendpdEmitter<A, B, C, D> {
3826        <Self as VblendpdEmitter<A, B, C, D>>::vblendpd(self, op0, op1, op2, op3);
3827    }
3828    /// `VBLENDPS` (VBLENDPS). 
3829    /// Packed single precision floating-point values from the second source operand (third operand) are conditionally merged with values from the first source operand (second operand) and written to the destination operand (first operand). The immediate bits [7:0] determine whether the corresponding single precision floating-point value in the destination is copied from the second source or first source. If a bit in the mask, corresponding to a word, is “1”, then the single precision floating-point value in the second source operand is copied, else the value in the first source operand is copied.
3830    ///
3831    ///
3832    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDPS.html).
3833    ///
3834    /// Supported operand variants:
3835    ///
3836    /// ```text
3837    /// +---+--------------------+
3838    /// | # | Operands           |
3839    /// +---+--------------------+
3840    /// | 1 | Xmm, Xmm, Mem, Imm |
3841    /// | 2 | Xmm, Xmm, Xmm, Imm |
3842    /// | 3 | Ymm, Ymm, Mem, Imm |
3843    /// | 4 | Ymm, Ymm, Ymm, Imm |
3844    /// +---+--------------------+
3845    /// ```
3846    #[inline]
3847    pub fn vblendps<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3848    where Assembler<'a>: VblendpsEmitter<A, B, C, D> {
3849        <Self as VblendpsEmitter<A, B, C, D>>::vblendps(self, op0, op1, op2, op3);
3850    }
3851    /// `VBLENDVPD` (VBLENDVPD). 
3852    /// Conditionally copy each quadword data element of double precision floating-point value from the second source operand and the first source operand depending on mask bits defined in the mask register operand. The mask bits are the most significant bit in each quadword element of the mask register.
3853    ///
3854    ///
3855    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDVPD.html).
3856    ///
3857    /// Supported operand variants:
3858    ///
3859    /// ```text
3860    /// +---+--------------------+
3861    /// | # | Operands           |
3862    /// +---+--------------------+
3863    /// | 1 | Xmm, Xmm, Mem, Xmm |
3864    /// | 2 | Xmm, Xmm, Xmm, Xmm |
3865    /// | 3 | Ymm, Ymm, Mem, Ymm |
3866    /// | 4 | Ymm, Ymm, Ymm, Ymm |
3867    /// +---+--------------------+
3868    /// ```
3869    #[inline]
3870    pub fn vblendvpd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3871    where Assembler<'a>: VblendvpdEmitter<A, B, C, D> {
3872        <Self as VblendvpdEmitter<A, B, C, D>>::vblendvpd(self, op0, op1, op2, op3);
3873    }
3874    /// `VBLENDVPS` (VBLENDVPS). 
3875    /// Conditionally copy each dword data element of single precision floating-point value from the second source operand and the first source operand depending on mask bits defined in the mask register operand. The mask bits are the most significant bit in each dword element of the mask register.
3876    ///
3877    ///
3878    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/BLENDVPS.html).
3879    ///
3880    /// Supported operand variants:
3881    ///
3882    /// ```text
3883    /// +---+--------------------+
3884    /// | # | Operands           |
3885    /// +---+--------------------+
3886    /// | 1 | Xmm, Xmm, Mem, Xmm |
3887    /// | 2 | Xmm, Xmm, Xmm, Xmm |
3888    /// | 3 | Ymm, Ymm, Mem, Ymm |
3889    /// | 4 | Ymm, Ymm, Ymm, Ymm |
3890    /// +---+--------------------+
3891    /// ```
3892    #[inline]
3893    pub fn vblendvps<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3894    where Assembler<'a>: VblendvpsEmitter<A, B, C, D> {
3895        <Self as VblendvpsEmitter<A, B, C, D>>::vblendvps(self, op0, op1, op2, op3);
3896    }
3897    /// `VBROADCASTF128` (VBROADCASTF128). 
3898    /// VBROADCASTSD/VBROADCASTSS/VBROADCASTF128 load floating-point values as one tuple from the source operand (second operand) in memory and broadcast to all elements of the destination operand (first operand).
3899    ///
3900    ///
3901    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VBROADCAST.html).
3902    ///
3903    /// Supported operand variants:
3904    ///
3905    /// ```text
3906    /// +---+----------+
3907    /// | # | Operands |
3908    /// +---+----------+
3909    /// | 1 | Ymm, Mem |
3910    /// | 2 | Ymm, Xmm |
3911    /// +---+----------+
3912    /// ```
3913    #[inline]
3914    pub fn vbroadcastf128<A, B>(&mut self, op0: A, op1: B)
3915    where Assembler<'a>: Vbroadcastf128Emitter<A, B> {
3916        <Self as Vbroadcastf128Emitter<A, B>>::vbroadcastf128(self, op0, op1);
3917    }
3918    /// `VCMPPD` (VCMPPD). 
3919    /// Performs a SIMD compare of the packed double precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.
3920    ///
3921    ///
3922    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPD.html).
3923    ///
3924    /// Supported operand variants:
3925    ///
3926    /// ```text
3927    /// +----+---------------------+
3928    /// | #  | Operands            |
3929    /// +----+---------------------+
3930    /// | 1  | KReg, Xmm, Mem, Imm |
3931    /// | 2  | KReg, Xmm, Xmm, Imm |
3932    /// | 3  | KReg, Ymm, Mem, Imm |
3933    /// | 4  | KReg, Ymm, Ymm, Imm |
3934    /// | 5  | KReg, Zmm, Mem, Imm |
3935    /// | 6  | KReg, Zmm, Zmm, Imm |
3936    /// | 7  | Xmm, Xmm, Mem, Imm  |
3937    /// | 8  | Xmm, Xmm, Xmm, Imm  |
3938    /// | 9  | Ymm, Ymm, Mem, Imm  |
3939    /// | 10 | Ymm, Ymm, Ymm, Imm  |
3940    /// +----+---------------------+
3941    /// ```
3942    #[inline]
3943    pub fn vcmppd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3944    where Assembler<'a>: VcmppdEmitter<A, B, C, D> {
3945        <Self as VcmppdEmitter<A, B, C, D>>::vcmppd(self, op0, op1, op2, op3);
3946    }
3947    /// `VCMPPS` (VCMPPS). 
3948    /// Performs a SIMD compare of the packed single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
3949    ///
3950    ///
3951    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPS.html).
3952    ///
3953    /// Supported operand variants:
3954    ///
3955    /// ```text
3956    /// +----+---------------------+
3957    /// | #  | Operands            |
3958    /// +----+---------------------+
3959    /// | 1  | KReg, Xmm, Mem, Imm |
3960    /// | 2  | KReg, Xmm, Xmm, Imm |
3961    /// | 3  | KReg, Ymm, Mem, Imm |
3962    /// | 4  | KReg, Ymm, Ymm, Imm |
3963    /// | 5  | KReg, Zmm, Mem, Imm |
3964    /// | 6  | KReg, Zmm, Zmm, Imm |
3965    /// | 7  | Xmm, Xmm, Mem, Imm  |
3966    /// | 8  | Xmm, Xmm, Xmm, Imm  |
3967    /// | 9  | Ymm, Ymm, Mem, Imm  |
3968    /// | 10 | Ymm, Ymm, Ymm, Imm  |
3969    /// +----+---------------------+
3970    /// ```
3971    #[inline]
3972    pub fn vcmpps<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3973    where Assembler<'a>: VcmppsEmitter<A, B, C, D> {
3974        <Self as VcmppsEmitter<A, B, C, D>>::vcmpps(self, op0, op1, op2, op3);
3975    }
3976    /// `VCMPSD`.
3977    ///
3978    /// Supported operand variants:
3979    ///
3980    /// ```text
3981    /// +---+---------------------+
3982    /// | # | Operands            |
3983    /// +---+---------------------+
3984    /// | 1 | KReg, Xmm, Mem, Imm |
3985    /// | 2 | KReg, Xmm, Xmm, Imm |
3986    /// | 3 | Xmm, Xmm, Mem, Imm  |
3987    /// | 4 | Xmm, Xmm, Xmm, Imm  |
3988    /// +---+---------------------+
3989    /// ```
3990    #[inline]
3991    pub fn vcmpsd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
3992    where Assembler<'a>: VcmpsdEmitter<A, B, C, D> {
3993        <Self as VcmpsdEmitter<A, B, C, D>>::vcmpsd(self, op0, op1, op2, op3);
3994    }
3995    /// `VCMPSS` (VCMPSS). 
3996    /// Compares the low single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
3997    ///
3998    ///
3999    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPSS.html).
4000    ///
4001    /// Supported operand variants:
4002    ///
4003    /// ```text
4004    /// +---+---------------------+
4005    /// | # | Operands            |
4006    /// +---+---------------------+
4007    /// | 1 | KReg, Xmm, Mem, Imm |
4008    /// | 2 | KReg, Xmm, Xmm, Imm |
4009    /// | 3 | Xmm, Xmm, Mem, Imm  |
4010    /// | 4 | Xmm, Xmm, Xmm, Imm  |
4011    /// +---+---------------------+
4012    /// ```
4013    #[inline]
4014    pub fn vcmpss<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4015    where Assembler<'a>: VcmpssEmitter<A, B, C, D> {
4016        <Self as VcmpssEmitter<A, B, C, D>>::vcmpss(self, op0, op1, op2, op3);
4017    }
4018    /// `VDPPD` (VDPPD). 
4019    /// Conditionally multiplies the packed double precision floating-point values in the destination operand (first operand) with the packed double precision floating-point values in the source (second operand) depending on a mask extracted from bits [5:4] of the immediate operand (third operand). If a condition mask bit is zero, the corresponding multiplication is replaced by a value of 0.0 in the manner described by Section 12.8.4 of Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
4020    ///
4021    ///
4022    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DPPD.html).
4023    ///
4024    /// Supported operand variants:
4025    ///
4026    /// ```text
4027    /// +---+--------------------+
4028    /// | # | Operands           |
4029    /// +---+--------------------+
4030    /// | 1 | Xmm, Xmm, Mem, Imm |
4031    /// | 2 | Xmm, Xmm, Xmm, Imm |
4032    /// +---+--------------------+
4033    /// ```
4034    #[inline]
4035    pub fn vdppd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4036    where Assembler<'a>: VdppdEmitter<A, B, C, D> {
4037        <Self as VdppdEmitter<A, B, C, D>>::vdppd(self, op0, op1, op2, op3);
4038    }
4039    /// `VDPPS` (VDPPS). 
4040    /// Conditionally multiplies the packed single precision floating-point values in the destination operand (first operand) with the packed single precision floats in the source (second operand) depending on a mask extracted from the high 4 bits of the immediate byte (third operand). If a condition mask bit in imm8[7:4] is zero, the corresponding multiplication is replaced by a value of 0.0 in the manner described by Section 12.8.4 of Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
4041    ///
4042    ///
4043    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DPPS.html).
4044    ///
4045    /// Supported operand variants:
4046    ///
4047    /// ```text
4048    /// +---+--------------------+
4049    /// | # | Operands           |
4050    /// +---+--------------------+
4051    /// | 1 | Xmm, Xmm, Mem, Imm |
4052    /// | 2 | Xmm, Xmm, Xmm, Imm |
4053    /// | 3 | Ymm, Ymm, Mem, Imm |
4054    /// | 4 | Ymm, Ymm, Ymm, Imm |
4055    /// +---+--------------------+
4056    /// ```
4057    #[inline]
4058    pub fn vdpps<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4059    where Assembler<'a>: VdppsEmitter<A, B, C, D> {
4060        <Self as VdppsEmitter<A, B, C, D>>::vdpps(self, op0, op1, op2, op3);
4061    }
4062    /// `VEXTRACTF128` (VEXTRACTF128). 
4063    /// VEXTRACTF128/VEXTRACTF32x4 and VEXTRACTF64x2 extract 128-bits of single precision floating-point values from the source operand (the second operand) and store to the low 128-bit of the destination operand (the first operand). The 128-bit data extraction occurs at an 128-bit granular offset specified by imm8[0] (256-bit) or imm8[1:0] as the multiply factor. The destination may be either a vector register or an 128-bit memory location.
4064    ///
4065    ///
4066    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VEXTRACTF128%3AVEXTRACTF32x4%3AVEXTRACTF64x2%3AVEXTRACTF32x8%3AVEXTRACTF64x4.html).
4067    ///
4068    /// Supported operand variants:
4069    ///
4070    /// ```text
4071    /// +---+---------------+
4072    /// | # | Operands      |
4073    /// +---+---------------+
4074    /// | 1 | Mem, Ymm, Imm |
4075    /// | 2 | Xmm, Ymm, Imm |
4076    /// +---+---------------+
4077    /// ```
4078    #[inline]
4079    pub fn vextractf128<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4080    where Assembler<'a>: Vextractf128Emitter<A, B, C> {
4081        <Self as Vextractf128Emitter<A, B, C>>::vextractf128(self, op0, op1, op2);
4082    }
4083    /// `VHADDPD` (VHADDPD). 
4084    /// Adds the double precision floating-point values in the high and low quadwords of the destination operand and stores the result in the low quadword of the destination operand.
4085    ///
4086    ///
4087    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HADDPD.html).
4088    ///
4089    /// Supported operand variants:
4090    ///
4091    /// ```text
4092    /// +---+---------------+
4093    /// | # | Operands      |
4094    /// +---+---------------+
4095    /// | 1 | Xmm, Xmm, Mem |
4096    /// | 2 | Xmm, Xmm, Xmm |
4097    /// | 3 | Ymm, Ymm, Mem |
4098    /// | 4 | Ymm, Ymm, Ymm |
4099    /// +---+---------------+
4100    /// ```
4101    #[inline]
4102    pub fn vhaddpd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4103    where Assembler<'a>: VhaddpdEmitter<A, B, C> {
4104        <Self as VhaddpdEmitter<A, B, C>>::vhaddpd(self, op0, op1, op2);
4105    }
4106    /// `VHADDPS` (VHADDPS). 
4107    /// Adds the single precision floating-point values in the first and second dwords of the destination operand and stores the result in the first dword of the destination operand.
4108    ///
4109    ///
4110    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HADDPS.html).
4111    ///
4112    /// Supported operand variants:
4113    ///
4114    /// ```text
4115    /// +---+---------------+
4116    /// | # | Operands      |
4117    /// +---+---------------+
4118    /// | 1 | Xmm, Xmm, Mem |
4119    /// | 2 | Xmm, Xmm, Xmm |
4120    /// | 3 | Ymm, Ymm, Mem |
4121    /// | 4 | Ymm, Ymm, Ymm |
4122    /// +---+---------------+
4123    /// ```
4124    #[inline]
4125    pub fn vhaddps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4126    where Assembler<'a>: VhaddpsEmitter<A, B, C> {
4127        <Self as VhaddpsEmitter<A, B, C>>::vhaddps(self, op0, op1, op2);
4128    }
4129    /// `VHSUBPD` (VHSUBPD). 
4130    /// The HSUBPD instruction subtracts horizontally the packed double precision floating-point numbers of both operands.
4131    ///
4132    ///
4133    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HSUBPD.html).
4134    ///
4135    /// Supported operand variants:
4136    ///
4137    /// ```text
4138    /// +---+---------------+
4139    /// | # | Operands      |
4140    /// +---+---------------+
4141    /// | 1 | Xmm, Xmm, Mem |
4142    /// | 2 | Xmm, Xmm, Xmm |
4143    /// | 3 | Ymm, Ymm, Mem |
4144    /// | 4 | Ymm, Ymm, Ymm |
4145    /// +---+---------------+
4146    /// ```
4147    #[inline]
4148    pub fn vhsubpd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4149    where Assembler<'a>: VhsubpdEmitter<A, B, C> {
4150        <Self as VhsubpdEmitter<A, B, C>>::vhsubpd(self, op0, op1, op2);
4151    }
4152    /// `VHSUBPS` (VHSUBPS). 
4153    /// Subtracts the single precision floating-point value in the second dword of the destination operand from the first dword of the destination operand and stores the result in the first dword of the destination operand.
4154    ///
4155    ///
4156    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/HSUBPS.html).
4157    ///
4158    /// Supported operand variants:
4159    ///
4160    /// ```text
4161    /// +---+---------------+
4162    /// | # | Operands      |
4163    /// +---+---------------+
4164    /// | 1 | Xmm, Xmm, Mem |
4165    /// | 2 | Xmm, Xmm, Xmm |
4166    /// | 3 | Ymm, Ymm, Mem |
4167    /// | 4 | Ymm, Ymm, Ymm |
4168    /// +---+---------------+
4169    /// ```
4170    #[inline]
4171    pub fn vhsubps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4172    where Assembler<'a>: VhsubpsEmitter<A, B, C> {
4173        <Self as VhsubpsEmitter<A, B, C>>::vhsubps(self, op0, op1, op2);
4174    }
4175    /// `VINSERTF128` (VINSERTF128). 
4176    /// VINSERTF128/VINSERTF32x4 and VINSERTF64x2 insert 128-bits of packed floating-point values from the second source operand (the third operand) into the destination operand (the first operand) at an 128-bit granularity offset multiplied by imm8[0] (256-bit) or imm8[1:0]. The remaining portions of the destination operand are copied from the corresponding fields of the first source operand (the second operand). The second source operand can be either an XMM register or a 128-bit memory location. The destination and first source operands are vector registers.
4177    ///
4178    ///
4179    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VINSERTF128%3AVINSERTF32x4%3AVINSERTF64x2%3AVINSERTF32x8%3AVINSERTF64x4.html).
4180    ///
4181    /// Supported operand variants:
4182    ///
4183    /// ```text
4184    /// +---+--------------------+
4185    /// | # | Operands           |
4186    /// +---+--------------------+
4187    /// | 1 | Ymm, Ymm, Mem, Imm |
4188    /// | 2 | Ymm, Ymm, Xmm, Imm |
4189    /// +---+--------------------+
4190    /// ```
4191    #[inline]
4192    pub fn vinsertf128<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4193    where Assembler<'a>: Vinsertf128Emitter<A, B, C, D> {
4194        <Self as Vinsertf128Emitter<A, B, C, D>>::vinsertf128(self, op0, op1, op2, op3);
4195    }
4196    /// `VLDDQU` (VLDDQU). 
4197    /// The instruction is functionally similar to (V)MOVDQU ymm/xmm, m256/m128 for loading from memory. That is: 32/16 bytes of data starting at an address specified by the source memory operand (second operand) are fetched from memory and placed in a destination register (first operand). The source operand need not be aligned on a 32/16-byte boundary. Up to 64/32 bytes may be loaded from memory; this is implementation dependent.
4198    ///
4199    ///
4200    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDDQU.html).
4201    ///
4202    /// Supported operand variants:
4203    ///
4204    /// ```text
4205    /// +---+----------+
4206    /// | # | Operands |
4207    /// +---+----------+
4208    /// | 1 | Xmm, Mem |
4209    /// | 2 | Ymm, Mem |
4210    /// +---+----------+
4211    /// ```
4212    #[inline]
4213    pub fn vlddqu<A, B>(&mut self, op0: A, op1: B)
4214    where Assembler<'a>: VlddquEmitter<A, B> {
4215        <Self as VlddquEmitter<A, B>>::vlddqu(self, op0, op1);
4216    }
4217    /// `VLDMXCSR` (VLDMXCSR). 
4218    /// Loads the source operand into the MXCSR control/status register. The source operand is a 32-bit memory location. See “MXCSR Control and Status Register” in Chapter 10, of the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for a description of the MXCSR register and its contents.
4219    ///
4220    ///
4221    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDMXCSR.html).
4222    ///
4223    /// Supported operand variants:
4224    ///
4225    /// ```text
4226    /// +---+----------+
4227    /// | # | Operands |
4228    /// +---+----------+
4229    /// | 1 | Mem      |
4230    /// +---+----------+
4231    /// ```
4232    #[inline]
4233    pub fn vldmxcsr<A>(&mut self, op0: A)
4234    where Assembler<'a>: VldmxcsrEmitter<A> {
4235        <Self as VldmxcsrEmitter<A>>::vldmxcsr(self, op0);
4236    }
4237    /// `VMASKMOVDQU` (VMASKMOVDQU). 
4238    /// Stores selected bytes from the source operand (first operand) into an 128-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are XMM registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
4239    ///
4240    ///
4241    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVDQU.html).
4242    ///
4243    /// Supported operand variants:
4244    ///
4245    /// ```text
4246    /// +---+----------+
4247    /// | # | Operands |
4248    /// +---+----------+
4249    /// | 1 | Xmm, Xmm |
4250    /// +---+----------+
4251    /// ```
4252    #[inline]
4253    pub fn vmaskmovdqu<A, B>(&mut self, op0: A, op1: B)
4254    where Assembler<'a>: VmaskmovdquEmitter<A, B> {
4255        <Self as VmaskmovdquEmitter<A, B>>::vmaskmovdqu(self, op0, op1);
4256    }
4257    /// `VMASKMOVPD` (VMASKMOVPD). 
4258    /// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
4259    ///
4260    ///
4261    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VMASKMOV.html).
4262    ///
4263    /// Supported operand variants:
4264    ///
4265    /// ```text
4266    /// +---+---------------+
4267    /// | # | Operands      |
4268    /// +---+---------------+
4269    /// | 1 | Mem, Xmm, Xmm |
4270    /// | 2 | Mem, Ymm, Ymm |
4271    /// | 3 | Xmm, Xmm, Mem |
4272    /// | 4 | Ymm, Ymm, Mem |
4273    /// +---+---------------+
4274    /// ```
4275    #[inline]
4276    pub fn vmaskmovpd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4277    where Assembler<'a>: VmaskmovpdEmitter<A, B, C> {
4278        <Self as VmaskmovpdEmitter<A, B, C>>::vmaskmovpd(self, op0, op1, op2);
4279    }
4280    /// `VMASKMOVPS` (VMASKMOVPS). 
4281    /// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
4282    ///
4283    ///
4284    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VMASKMOV.html).
4285    ///
4286    /// Supported operand variants:
4287    ///
4288    /// ```text
4289    /// +---+---------------+
4290    /// | # | Operands      |
4291    /// +---+---------------+
4292    /// | 1 | Mem, Xmm, Xmm |
4293    /// | 2 | Mem, Ymm, Ymm |
4294    /// | 3 | Xmm, Xmm, Mem |
4295    /// | 4 | Ymm, Ymm, Mem |
4296    /// +---+---------------+
4297    /// ```
4298    #[inline]
4299    pub fn vmaskmovps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4300    where Assembler<'a>: VmaskmovpsEmitter<A, B, C> {
4301        <Self as VmaskmovpsEmitter<A, B, C>>::vmaskmovps(self, op0, op1, op2);
4302    }
4303    /// `VMOVD` (VMOVD). 
4304    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
4305    ///
4306    ///
4307    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
4308    ///
4309    /// Supported operand variants:
4310    ///
4311    /// ```text
4312    /// +---+----------+
4313    /// | # | Operands |
4314    /// +---+----------+
4315    /// | 1 | Mem, Xmm |
4316    /// | 2 | Xmm, Mem |
4317    /// +---+----------+
4318    /// ```
4319    #[inline]
4320    pub fn vmovd<A, B>(&mut self, op0: A, op1: B)
4321    where Assembler<'a>: VmovdEmitter<A, B> {
4322        <Self as VmovdEmitter<A, B>>::vmovd(self, op0, op1);
4323    }
4324    /// `VMOVDQA` (VMOVDQA). 
4325    /// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
4326    ///
4327    ///
4328    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQA%3AVMOVDQA32%3AVMOVDQA64.html).
4329    ///
4330    /// Supported operand variants:
4331    ///
4332    /// ```text
4333    /// +---+----------+
4334    /// | # | Operands |
4335    /// +---+----------+
4336    /// | 1 | Mem, Xmm |
4337    /// | 2 | Mem, Ymm |
4338    /// | 3 | Xmm, Mem |
4339    /// | 4 | Xmm, Xmm |
4340    /// | 5 | Ymm, Mem |
4341    /// | 6 | Ymm, Ymm |
4342    /// +---+----------+
4343    /// ```
4344    #[inline]
4345    pub fn vmovdqa<A, B>(&mut self, op0: A, op1: B)
4346    where Assembler<'a>: VmovdqaEmitter<A, B> {
4347        <Self as VmovdqaEmitter<A, B>>::vmovdqa(self, op0, op1);
4348    }
4349    /// `VMOVDQU` (VMOVDQU). 
4350    /// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
4351    ///
4352    ///
4353    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQU%3AVMOVDQU8%3AVMOVDQU16%3AVMOVDQU32%3AVMOVDQU64.html).
4354    ///
4355    /// Supported operand variants:
4356    ///
4357    /// ```text
4358    /// +---+----------+
4359    /// | # | Operands |
4360    /// +---+----------+
4361    /// | 1 | Mem, Xmm |
4362    /// | 2 | Mem, Ymm |
4363    /// | 3 | Xmm, Mem |
4364    /// | 4 | Xmm, Xmm |
4365    /// | 5 | Ymm, Mem |
4366    /// | 6 | Ymm, Ymm |
4367    /// +---+----------+
4368    /// ```
4369    #[inline]
4370    pub fn vmovdqu<A, B>(&mut self, op0: A, op1: B)
4371    where Assembler<'a>: VmovdquEmitter<A, B> {
4372        <Self as VmovdquEmitter<A, B>>::vmovdqu(self, op0, op1);
4373    }
4374    /// `VMOVD_G2X` (VMOVD). 
4375    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
4376    ///
4377    ///
4378    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
4379    ///
4380    /// Supported operand variants:
4381    ///
4382    /// ```text
4383    /// +---+----------+
4384    /// | # | Operands |
4385    /// +---+----------+
4386    /// | 1 | Xmm, Gpd |
4387    /// +---+----------+
4388    /// ```
4389    #[inline]
4390    pub fn vmovd_g2x<A, B>(&mut self, op0: A, op1: B)
4391    where Assembler<'a>: VmovdG2xEmitter<A, B> {
4392        <Self as VmovdG2xEmitter<A, B>>::vmovd_g2x(self, op0, op1);
4393    }
4394    /// `VMOVD_X2G` (VMOVD). 
4395    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
4396    ///
4397    ///
4398    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
4399    ///
4400    /// Supported operand variants:
4401    ///
4402    /// ```text
4403    /// +---+----------+
4404    /// | # | Operands |
4405    /// +---+----------+
4406    /// | 1 | Gpd, Xmm |
4407    /// +---+----------+
4408    /// ```
4409    #[inline]
4410    pub fn vmovd_x2g<A, B>(&mut self, op0: A, op1: B)
4411    where Assembler<'a>: VmovdX2gEmitter<A, B> {
4412        <Self as VmovdX2gEmitter<A, B>>::vmovd_x2g(self, op0, op1);
4413    }
4414    /// `VMOVMSKPD` (VMOVMSKPD). 
4415    /// Extracts the sign bits from the packed double precision floating-point values in the source operand (second operand), formats them into a 2-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM register, and the destination operand is a general-purpose register. The mask is stored in the 2 low-order bits of the destination operand. Zero-extend the upper bits of the destination.
4416    ///
4417    ///
4418    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPD.html).
4419    ///
4420    /// Supported operand variants:
4421    ///
4422    /// ```text
4423    /// +---+----------+
4424    /// | # | Operands |
4425    /// +---+----------+
4426    /// | 1 | Gpd, Xmm |
4427    /// | 2 | Gpd, Ymm |
4428    /// +---+----------+
4429    /// ```
4430    #[inline]
4431    pub fn vmovmskpd<A, B>(&mut self, op0: A, op1: B)
4432    where Assembler<'a>: VmovmskpdEmitter<A, B> {
4433        <Self as VmovmskpdEmitter<A, B>>::vmovmskpd(self, op0, op1);
4434    }
4435    /// `VMOVMSKPS` (VMOVMSKPS). 
4436    /// Extracts the sign bits from the packed single precision floating-point values in the source operand (second operand), formats them into a 4- or 8-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM or YMM register, and the destination operand is a general-purpose register. The mask is stored in the 4 or 8 low-order bits of the destination operand. The upper bits of the destination operand beyond the mask are filled with zeros.
4437    ///
4438    ///
4439    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPS.html).
4440    ///
4441    /// Supported operand variants:
4442    ///
4443    /// ```text
4444    /// +---+----------+
4445    /// | # | Operands |
4446    /// +---+----------+
4447    /// | 1 | Gpd, Xmm |
4448    /// | 2 | Gpd, Ymm |
4449    /// +---+----------+
4450    /// ```
4451    #[inline]
4452    pub fn vmovmskps<A, B>(&mut self, op0: A, op1: B)
4453    where Assembler<'a>: VmovmskpsEmitter<A, B> {
4454        <Self as VmovmskpsEmitter<A, B>>::vmovmskps(self, op0, op1);
4455    }
4456    /// `VMOVQ_G2X` (VMOVQ). 
4457    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
4458    ///
4459    ///
4460    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
4461    ///
4462    /// Supported operand variants:
4463    ///
4464    /// ```text
4465    /// +---+----------+
4466    /// | # | Operands |
4467    /// +---+----------+
4468    /// | 1 | Xmm, Gpd |
4469    /// | 2 | Xmm, Gpq |
4470    /// | 3 | Xmm, Mem |
4471    /// +---+----------+
4472    /// ```
4473    #[inline]
4474    pub fn vmovq_g2x<A, B>(&mut self, op0: A, op1: B)
4475    where Assembler<'a>: VmovqG2xEmitter<A, B> {
4476        <Self as VmovqG2xEmitter<A, B>>::vmovq_g2x(self, op0, op1);
4477    }
4478    /// `VMOVQ_X2G` (VMOVQ). 
4479    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
4480    ///
4481    ///
4482    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
4483    ///
4484    /// Supported operand variants:
4485    ///
4486    /// ```text
4487    /// +---+----------+
4488    /// | # | Operands |
4489    /// +---+----------+
4490    /// | 1 | Gpd, Xmm |
4491    /// | 2 | Gpq, Xmm |
4492    /// | 3 | Mem, Xmm |
4493    /// +---+----------+
4494    /// ```
4495    #[inline]
4496    pub fn vmovq_x2g<A, B>(&mut self, op0: A, op1: B)
4497    where Assembler<'a>: VmovqX2gEmitter<A, B> {
4498        <Self as VmovqX2gEmitter<A, B>>::vmovq_x2g(self, op0, op1);
4499    }
4500    /// `VMPSADBW` (VMPSADBW). 
4501    /// (V)MPSADBW calculates packed word results of sum-absolute-difference (SAD) of unsigned bytes from two blocks of 32-bit dword elements, using two select fields in the immediate byte to select the offsets of the two blocks within the first source operand and the second operand. Packed SAD word results are calculated within each 128-bit lane. Each SAD word result is calculated between a stationary block_2 (whose offset within the second source operand is selected by a two bit select control, multiplied by 32 bits) and a sliding block_1 at consecutive byte-granular position within the first source operand. The offset of the first 32-bit block of block_1 is selectable using a one bit select control, multiplied by 32 bits.
4502    ///
4503    ///
4504    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MPSADBW.html).
4505    ///
4506    /// Supported operand variants:
4507    ///
4508    /// ```text
4509    /// +---+--------------------+
4510    /// | # | Operands           |
4511    /// +---+--------------------+
4512    /// | 1 | Xmm, Xmm, Mem, Imm |
4513    /// | 2 | Xmm, Xmm, Xmm, Imm |
4514    /// | 3 | Ymm, Ymm, Mem, Imm |
4515    /// | 4 | Ymm, Ymm, Ymm, Imm |
4516    /// +---+--------------------+
4517    /// ```
4518    #[inline]
4519    pub fn vmpsadbw<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4520    where Assembler<'a>: VmpsadbwEmitter<A, B, C, D> {
4521        <Self as VmpsadbwEmitter<A, B, C, D>>::vmpsadbw(self, op0, op1, op2, op3);
4522    }
4523    /// `VPAND` (VPAND). 
4524    /// Performs a bitwise logical AND operation on the first source operand and second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bits of the first and second operands are 1, otherwise it is set to 0.
4525    ///
4526    ///
4527    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAND.html).
4528    ///
4529    /// Supported operand variants:
4530    ///
4531    /// ```text
4532    /// +---+---------------+
4533    /// | # | Operands      |
4534    /// +---+---------------+
4535    /// | 1 | Xmm, Xmm, Mem |
4536    /// | 2 | Xmm, Xmm, Xmm |
4537    /// | 3 | Ymm, Ymm, Mem |
4538    /// | 4 | Ymm, Ymm, Ymm |
4539    /// +---+---------------+
4540    /// ```
4541    #[inline]
4542    pub fn vpand<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4543    where Assembler<'a>: VpandEmitter<A, B, C> {
4544        <Self as VpandEmitter<A, B, C>>::vpand(self, op0, op1, op2);
4545    }
4546    /// `VPANDN` (VPANDN). 
4547    /// Performs a bitwise logical NOT operation on the first source operand, then performs bitwise AND with second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bit in the first operand is 0 and the corresponding bit in the second operand is 1, otherwise it is set to 0.
4548    ///
4549    ///
4550    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PANDN.html).
4551    ///
4552    /// Supported operand variants:
4553    ///
4554    /// ```text
4555    /// +---+---------------+
4556    /// | # | Operands      |
4557    /// +---+---------------+
4558    /// | 1 | Xmm, Xmm, Mem |
4559    /// | 2 | Xmm, Xmm, Xmm |
4560    /// | 3 | Ymm, Ymm, Mem |
4561    /// | 4 | Ymm, Ymm, Ymm |
4562    /// +---+---------------+
4563    /// ```
4564    #[inline]
4565    pub fn vpandn<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4566    where Assembler<'a>: VpandnEmitter<A, B, C> {
4567        <Self as VpandnEmitter<A, B, C>>::vpandn(self, op0, op1, op2);
4568    }
4569    /// `VPBLENDVB` (VPBLENDVB). 
4570    /// Conditionally copies byte elements from the source operand (second operand) to the destination operand (first operand) depending on mask bits defined in the implicit third register argument, XMM0. The mask bits are the most significant bit in each byte element of the XMM0 register.
4571    ///
4572    ///
4573    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PBLENDVB.html).
4574    ///
4575    /// Supported operand variants:
4576    ///
4577    /// ```text
4578    /// +---+--------------------+
4579    /// | # | Operands           |
4580    /// +---+--------------------+
4581    /// | 1 | Xmm, Xmm, Mem, Xmm |
4582    /// | 2 | Xmm, Xmm, Xmm, Xmm |
4583    /// | 3 | Ymm, Ymm, Mem, Ymm |
4584    /// | 4 | Ymm, Ymm, Ymm, Ymm |
4585    /// +---+--------------------+
4586    /// ```
4587    #[inline]
4588    pub fn vpblendvb<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4589    where Assembler<'a>: VpblendvbEmitter<A, B, C, D> {
4590        <Self as VpblendvbEmitter<A, B, C, D>>::vpblendvb(self, op0, op1, op2, op3);
4591    }
4592    /// `VPBLENDW` (VPBLENDW). 
4593    /// Words from the source operand (second operand) are conditionally written to the destination operand (first operand) depending on bits in the immediate operand (third operand). The immediate bits (bits 7:0) form a mask that determines whether the corresponding word in the destination is copied from the source. If a bit in the mask, corresponding to a word, is “1", then the word is copied, else the word element in the destination operand is unchanged.
4594    ///
4595    ///
4596    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PBLENDW.html).
4597    ///
4598    /// Supported operand variants:
4599    ///
4600    /// ```text
4601    /// +---+--------------------+
4602    /// | # | Operands           |
4603    /// +---+--------------------+
4604    /// | 1 | Xmm, Xmm, Mem, Imm |
4605    /// | 2 | Xmm, Xmm, Xmm, Imm |
4606    /// | 3 | Ymm, Ymm, Mem, Imm |
4607    /// | 4 | Ymm, Ymm, Ymm, Imm |
4608    /// +---+--------------------+
4609    /// ```
4610    #[inline]
4611    pub fn vpblendw<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4612    where Assembler<'a>: VpblendwEmitter<A, B, C, D> {
4613        <Self as VpblendwEmitter<A, B, C, D>>::vpblendw(self, op0, op1, op2, op3);
4614    }
4615    /// `VPCMPEQB` (VPCMPEQB). 
4616    /// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
4617    ///
4618    ///
4619    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
4620    ///
4621    /// Supported operand variants:
4622    ///
4623    /// ```text
4624    /// +----+----------------+
4625    /// | #  | Operands       |
4626    /// +----+----------------+
4627    /// | 1  | KReg, Xmm, Mem |
4628    /// | 2  | KReg, Xmm, Xmm |
4629    /// | 3  | KReg, Ymm, Mem |
4630    /// | 4  | KReg, Ymm, Ymm |
4631    /// | 5  | KReg, Zmm, Mem |
4632    /// | 6  | KReg, Zmm, Zmm |
4633    /// | 7  | Xmm, Xmm, Mem  |
4634    /// | 8  | Xmm, Xmm, Xmm  |
4635    /// | 9  | Ymm, Ymm, Mem  |
4636    /// | 10 | Ymm, Ymm, Ymm  |
4637    /// +----+----------------+
4638    /// ```
4639    #[inline]
4640    pub fn vpcmpeqb<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4641    where Assembler<'a>: VpcmpeqbEmitter<A, B, C> {
4642        <Self as VpcmpeqbEmitter<A, B, C>>::vpcmpeqb(self, op0, op1, op2);
4643    }
4644    /// `VPCMPEQD` (VPCMPEQD). 
4645    /// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
4646    ///
4647    ///
4648    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
4649    ///
4650    /// Supported operand variants:
4651    ///
4652    /// ```text
4653    /// +----+----------------+
4654    /// | #  | Operands       |
4655    /// +----+----------------+
4656    /// | 1  | KReg, Xmm, Mem |
4657    /// | 2  | KReg, Xmm, Xmm |
4658    /// | 3  | KReg, Ymm, Mem |
4659    /// | 4  | KReg, Ymm, Ymm |
4660    /// | 5  | KReg, Zmm, Mem |
4661    /// | 6  | KReg, Zmm, Zmm |
4662    /// | 7  | Xmm, Xmm, Mem  |
4663    /// | 8  | Xmm, Xmm, Xmm  |
4664    /// | 9  | Ymm, Ymm, Mem  |
4665    /// | 10 | Ymm, Ymm, Ymm  |
4666    /// +----+----------------+
4667    /// ```
4668    #[inline]
4669    pub fn vpcmpeqd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4670    where Assembler<'a>: VpcmpeqdEmitter<A, B, C> {
4671        <Self as VpcmpeqdEmitter<A, B, C>>::vpcmpeqd(self, op0, op1, op2);
4672    }
4673    /// `VPCMPEQQ` (VPCMPEQQ). 
4674    /// Performs an SIMD compare for equality of the packed quadwords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination is set to all 1s; otherwise, it is set to 0s.
4675    ///
4676    ///
4677    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQQ.html).
4678    ///
4679    /// Supported operand variants:
4680    ///
4681    /// ```text
4682    /// +----+----------------+
4683    /// | #  | Operands       |
4684    /// +----+----------------+
4685    /// | 1  | KReg, Xmm, Mem |
4686    /// | 2  | KReg, Xmm, Xmm |
4687    /// | 3  | KReg, Ymm, Mem |
4688    /// | 4  | KReg, Ymm, Ymm |
4689    /// | 5  | KReg, Zmm, Mem |
4690    /// | 6  | KReg, Zmm, Zmm |
4691    /// | 7  | Xmm, Xmm, Mem  |
4692    /// | 8  | Xmm, Xmm, Xmm  |
4693    /// | 9  | Ymm, Ymm, Mem  |
4694    /// | 10 | Ymm, Ymm, Ymm  |
4695    /// +----+----------------+
4696    /// ```
4697    #[inline]
4698    pub fn vpcmpeqq<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4699    where Assembler<'a>: VpcmpeqqEmitter<A, B, C> {
4700        <Self as VpcmpeqqEmitter<A, B, C>>::vpcmpeqq(self, op0, op1, op2);
4701    }
4702    /// `VPCMPEQW` (VPCMPEQW). 
4703    /// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
4704    ///
4705    ///
4706    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
4707    ///
4708    /// Supported operand variants:
4709    ///
4710    /// ```text
4711    /// +----+----------------+
4712    /// | #  | Operands       |
4713    /// +----+----------------+
4714    /// | 1  | KReg, Xmm, Mem |
4715    /// | 2  | KReg, Xmm, Xmm |
4716    /// | 3  | KReg, Ymm, Mem |
4717    /// | 4  | KReg, Ymm, Ymm |
4718    /// | 5  | KReg, Zmm, Mem |
4719    /// | 6  | KReg, Zmm, Zmm |
4720    /// | 7  | Xmm, Xmm, Mem  |
4721    /// | 8  | Xmm, Xmm, Xmm  |
4722    /// | 9  | Ymm, Ymm, Mem  |
4723    /// | 10 | Ymm, Ymm, Ymm  |
4724    /// +----+----------------+
4725    /// ```
4726    #[inline]
4727    pub fn vpcmpeqw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4728    where Assembler<'a>: VpcmpeqwEmitter<A, B, C> {
4729        <Self as VpcmpeqwEmitter<A, B, C>>::vpcmpeqw(self, op0, op1, op2);
4730    }
4731    /// `VPCMPESTRI` (VPCMPESTRI). 
4732    /// The instruction compares and processes data from two string fragments based on the encoded value in the imm8 control byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”), and generates an index stored to the count register (ECX).
4733    ///
4734    ///
4735    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPESTRI.html).
4736    ///
4737    /// Supported operand variants:
4738    ///
4739    /// ```text
4740    /// +---+---------------+
4741    /// | # | Operands      |
4742    /// +---+---------------+
4743    /// | 1 | Xmm, Mem, Imm |
4744    /// | 2 | Xmm, Xmm, Imm |
4745    /// +---+---------------+
4746    /// ```
4747    #[inline]
4748    pub fn vpcmpestri<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4749    where Assembler<'a>: VpcmpestriEmitter<A, B, C> {
4750        <Self as VpcmpestriEmitter<A, B, C>>::vpcmpestri(self, op0, op1, op2);
4751    }
4752    /// `VPCMPESTRM` (VPCMPESTRM). 
4753    /// The instruction compares data from two string fragments based on the encoded value in the imm8 contol byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”), and generates a mask stored to XMM0.
4754    ///
4755    ///
4756    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPESTRM.html).
4757    ///
4758    /// Supported operand variants:
4759    ///
4760    /// ```text
4761    /// +---+---------------+
4762    /// | # | Operands      |
4763    /// +---+---------------+
4764    /// | 1 | Xmm, Mem, Imm |
4765    /// | 2 | Xmm, Xmm, Imm |
4766    /// +---+---------------+
4767    /// ```
4768    #[inline]
4769    pub fn vpcmpestrm<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4770    where Assembler<'a>: VpcmpestrmEmitter<A, B, C> {
4771        <Self as VpcmpestrmEmitter<A, B, C>>::vpcmpestrm(self, op0, op1, op2);
4772    }
4773    /// `VPCMPGTB` (VPCMPGTB). 
4774    /// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
4775    ///
4776    ///
4777    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
4778    ///
4779    /// Supported operand variants:
4780    ///
4781    /// ```text
4782    /// +----+----------------+
4783    /// | #  | Operands       |
4784    /// +----+----------------+
4785    /// | 1  | KReg, Xmm, Mem |
4786    /// | 2  | KReg, Xmm, Xmm |
4787    /// | 3  | KReg, Ymm, Mem |
4788    /// | 4  | KReg, Ymm, Ymm |
4789    /// | 5  | KReg, Zmm, Mem |
4790    /// | 6  | KReg, Zmm, Zmm |
4791    /// | 7  | Xmm, Xmm, Mem  |
4792    /// | 8  | Xmm, Xmm, Xmm  |
4793    /// | 9  | Ymm, Ymm, Mem  |
4794    /// | 10 | Ymm, Ymm, Ymm  |
4795    /// +----+----------------+
4796    /// ```
4797    #[inline]
4798    pub fn vpcmpgtb<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4799    where Assembler<'a>: VpcmpgtbEmitter<A, B, C> {
4800        <Self as VpcmpgtbEmitter<A, B, C>>::vpcmpgtb(self, op0, op1, op2);
4801    }
4802    /// `VPCMPGTD` (VPCMPGTD). 
4803    /// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
4804    ///
4805    ///
4806    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
4807    ///
4808    /// Supported operand variants:
4809    ///
4810    /// ```text
4811    /// +----+----------------+
4812    /// | #  | Operands       |
4813    /// +----+----------------+
4814    /// | 1  | KReg, Xmm, Mem |
4815    /// | 2  | KReg, Xmm, Xmm |
4816    /// | 3  | KReg, Ymm, Mem |
4817    /// | 4  | KReg, Ymm, Ymm |
4818    /// | 5  | KReg, Zmm, Mem |
4819    /// | 6  | KReg, Zmm, Zmm |
4820    /// | 7  | Xmm, Xmm, Mem  |
4821    /// | 8  | Xmm, Xmm, Xmm  |
4822    /// | 9  | Ymm, Ymm, Mem  |
4823    /// | 10 | Ymm, Ymm, Ymm  |
4824    /// +----+----------------+
4825    /// ```
4826    #[inline]
4827    pub fn vpcmpgtd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4828    where Assembler<'a>: VpcmpgtdEmitter<A, B, C> {
4829        <Self as VpcmpgtdEmitter<A, B, C>>::vpcmpgtd(self, op0, op1, op2);
4830    }
4831    /// `VPCMPGTQ` (VPCMPGTQ). 
4832    /// Performs an SIMD signed compare for the packed quadwords in the destination operand (first operand) and the source operand (second operand). If the data element in the first (destination) operand is greater than the corresponding element in the second (source) operand, the corresponding data element in the destination is set to all 1s; otherwise, it is set to 0s.
4833    ///
4834    ///
4835    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTQ.html).
4836    ///
4837    /// Supported operand variants:
4838    ///
4839    /// ```text
4840    /// +----+----------------+
4841    /// | #  | Operands       |
4842    /// +----+----------------+
4843    /// | 1  | KReg, Xmm, Mem |
4844    /// | 2  | KReg, Xmm, Xmm |
4845    /// | 3  | KReg, Ymm, Mem |
4846    /// | 4  | KReg, Ymm, Ymm |
4847    /// | 5  | KReg, Zmm, Mem |
4848    /// | 6  | KReg, Zmm, Zmm |
4849    /// | 7  | Xmm, Xmm, Mem  |
4850    /// | 8  | Xmm, Xmm, Xmm  |
4851    /// | 9  | Ymm, Ymm, Mem  |
4852    /// | 10 | Ymm, Ymm, Ymm  |
4853    /// +----+----------------+
4854    /// ```
4855    #[inline]
4856    pub fn vpcmpgtq<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4857    where Assembler<'a>: VpcmpgtqEmitter<A, B, C> {
4858        <Self as VpcmpgtqEmitter<A, B, C>>::vpcmpgtq(self, op0, op1, op2);
4859    }
4860    /// `VPCMPGTW` (VPCMPGTW). 
4861    /// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
4862    ///
4863    ///
4864    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
4865    ///
4866    /// Supported operand variants:
4867    ///
4868    /// ```text
4869    /// +----+----------------+
4870    /// | #  | Operands       |
4871    /// +----+----------------+
4872    /// | 1  | KReg, Xmm, Mem |
4873    /// | 2  | KReg, Xmm, Xmm |
4874    /// | 3  | KReg, Ymm, Mem |
4875    /// | 4  | KReg, Ymm, Ymm |
4876    /// | 5  | KReg, Zmm, Mem |
4877    /// | 6  | KReg, Zmm, Zmm |
4878    /// | 7  | Xmm, Xmm, Mem  |
4879    /// | 8  | Xmm, Xmm, Xmm  |
4880    /// | 9  | Ymm, Ymm, Mem  |
4881    /// | 10 | Ymm, Ymm, Ymm  |
4882    /// +----+----------------+
4883    /// ```
4884    #[inline]
4885    pub fn vpcmpgtw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4886    where Assembler<'a>: VpcmpgtwEmitter<A, B, C> {
4887        <Self as VpcmpgtwEmitter<A, B, C>>::vpcmpgtw(self, op0, op1, op2);
4888    }
4889    /// `VPCMPISTRI` (VPCMPISTRI). 
4890    /// The instruction compares data from two strings based on the encoded value in the imm8 control byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”), and generates an index stored to ECX.
4891    ///
4892    ///
4893    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPISTRI.html).
4894    ///
4895    /// Supported operand variants:
4896    ///
4897    /// ```text
4898    /// +---+---------------+
4899    /// | # | Operands      |
4900    /// +---+---------------+
4901    /// | 1 | Xmm, Mem, Imm |
4902    /// | 2 | Xmm, Xmm, Imm |
4903    /// +---+---------------+
4904    /// ```
4905    #[inline]
4906    pub fn vpcmpistri<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4907    where Assembler<'a>: VpcmpistriEmitter<A, B, C> {
4908        <Self as VpcmpistriEmitter<A, B, C>>::vpcmpistri(self, op0, op1, op2);
4909    }
4910    /// `VPCMPISTRM` (VPCMPISTRM). 
4911    /// The instruction compares data from two strings based on the encoded value in the imm8 byte (see Section 4.1, “Imm8 Control Byte Operation for PCMPESTRI / PCMPESTRM / PCMPISTRI / PCMPISTRM”) generating a mask stored to XMM0.
4912    ///
4913    ///
4914    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPISTRM.html).
4915    ///
4916    /// Supported operand variants:
4917    ///
4918    /// ```text
4919    /// +---+---------------+
4920    /// | # | Operands      |
4921    /// +---+---------------+
4922    /// | 1 | Xmm, Mem, Imm |
4923    /// | 2 | Xmm, Xmm, Imm |
4924    /// +---+---------------+
4925    /// ```
4926    #[inline]
4927    pub fn vpcmpistrm<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4928    where Assembler<'a>: VpcmpistrmEmitter<A, B, C> {
4929        <Self as VpcmpistrmEmitter<A, B, C>>::vpcmpistrm(self, op0, op1, op2);
4930    }
4931    /// `VPERM2F128` (VPERM2F128). 
4932    /// Permute 128 bit floating-point-containing fields from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
4933    ///
4934    ///
4935    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPERM2F128.html).
4936    ///
4937    /// Supported operand variants:
4938    ///
4939    /// ```text
4940    /// +---+--------------------+
4941    /// | # | Operands           |
4942    /// +---+--------------------+
4943    /// | 1 | Ymm, Ymm, Mem, Imm |
4944    /// | 2 | Ymm, Ymm, Ymm, Imm |
4945    /// +---+--------------------+
4946    /// ```
4947    #[inline]
4948    pub fn vperm2f128<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
4949    where Assembler<'a>: Vperm2f128Emitter<A, B, C, D> {
4950        <Self as Vperm2f128Emitter<A, B, C, D>>::vperm2f128(self, op0, op1, op2, op3);
4951    }
4952    /// `VPEXTRD` (VPEXTRD). 
4953    /// Extract a byte/dword/qword integer value from the source XMM register at a byte/dword/qword offset determined from imm8[3:0]. The destination can be a register or byte/dword/qword memory location. If the destination is a register, the upper bits of the register are zero extended.
4954    ///
4955    ///
4956    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRB%3APEXTRD%3APEXTRQ.html).
4957    ///
4958    /// Supported operand variants:
4959    ///
4960    /// ```text
4961    /// +---+---------------+
4962    /// | # | Operands      |
4963    /// +---+---------------+
4964    /// | 1 | Gpd, Xmm, Imm |
4965    /// | 2 | Mem, Xmm, Imm |
4966    /// +---+---------------+
4967    /// ```
4968    #[inline]
4969    pub fn vpextrd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4970    where Assembler<'a>: VpextrdEmitter<A, B, C> {
4971        <Self as VpextrdEmitter<A, B, C>>::vpextrd(self, op0, op1, op2);
4972    }
4973    /// `VPEXTRQ` (VPEXTRQ). 
4974    /// Extract a byte/dword/qword integer value from the source XMM register at a byte/dword/qword offset determined from imm8[3:0]. The destination can be a register or byte/dword/qword memory location. If the destination is a register, the upper bits of the register are zero extended.
4975    ///
4976    ///
4977    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRB%3APEXTRD%3APEXTRQ.html).
4978    ///
4979    /// Supported operand variants:
4980    ///
4981    /// ```text
4982    /// +---+---------------+
4983    /// | # | Operands      |
4984    /// +---+---------------+
4985    /// | 1 | Gpd, Xmm, Imm |
4986    /// | 2 | Gpq, Xmm, Imm |
4987    /// | 3 | Mem, Xmm, Imm |
4988    /// +---+---------------+
4989    /// ```
4990    #[inline]
4991    pub fn vpextrq<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4992    where Assembler<'a>: VpextrqEmitter<A, B, C> {
4993        <Self as VpextrqEmitter<A, B, C>>::vpextrq(self, op0, op1, op2);
4994    }
4995    /// `VPHADDD` (VPHADDD). 
4996    /// (V)PHADDW adds two adjacent 16-bit signed integers horizontally from the source and destination operands and packs the 16-bit signed results to the destination operand (first operand). (V)PHADDD adds two adjacent 32-bit signed integers horizontally from the source and destination operands and packs the 32-bit signed results to the destination operand (first operand). When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
4997    ///
4998    ///
4999    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHADDW%3APHADDD.html).
5000    ///
5001    /// Supported operand variants:
5002    ///
5003    /// ```text
5004    /// +---+---------------+
5005    /// | # | Operands      |
5006    /// +---+---------------+
5007    /// | 1 | Xmm, Xmm, Mem |
5008    /// | 2 | Xmm, Xmm, Xmm |
5009    /// | 3 | Ymm, Ymm, Mem |
5010    /// | 4 | Ymm, Ymm, Ymm |
5011    /// +---+---------------+
5012    /// ```
5013    #[inline]
5014    pub fn vphaddd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5015    where Assembler<'a>: VphadddEmitter<A, B, C> {
5016        <Self as VphadddEmitter<A, B, C>>::vphaddd(self, op0, op1, op2);
5017    }
5018    /// `VPHADDSW` (VPHADDSW). 
5019    /// (V)PHADDSW adds two adjacent signed 16-bit integers horizontally from the source and destination operands and saturates the signed results; packs the signed, saturated 16-bit results to the destination operand (first operand) When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
5020    ///
5021    ///
5022    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHADDSW.html).
5023    ///
5024    /// Supported operand variants:
5025    ///
5026    /// ```text
5027    /// +---+---------------+
5028    /// | # | Operands      |
5029    /// +---+---------------+
5030    /// | 1 | Xmm, Xmm, Mem |
5031    /// | 2 | Xmm, Xmm, Xmm |
5032    /// | 3 | Ymm, Ymm, Mem |
5033    /// | 4 | Ymm, Ymm, Ymm |
5034    /// +---+---------------+
5035    /// ```
5036    #[inline]
5037    pub fn vphaddsw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5038    where Assembler<'a>: VphaddswEmitter<A, B, C> {
5039        <Self as VphaddswEmitter<A, B, C>>::vphaddsw(self, op0, op1, op2);
5040    }
5041    /// `VPHADDW` (VPHADDW). 
5042    /// (V)PHADDW adds two adjacent 16-bit signed integers horizontally from the source and destination operands and packs the 16-bit signed results to the destination operand (first operand). (V)PHADDD adds two adjacent 32-bit signed integers horizontally from the source and destination operands and packs the 32-bit signed results to the destination operand (first operand). When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
5043    ///
5044    ///
5045    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHADDW%3APHADDD.html).
5046    ///
5047    /// Supported operand variants:
5048    ///
5049    /// ```text
5050    /// +---+---------------+
5051    /// | # | Operands      |
5052    /// +---+---------------+
5053    /// | 1 | Xmm, Xmm, Mem |
5054    /// | 2 | Xmm, Xmm, Xmm |
5055    /// | 3 | Ymm, Ymm, Mem |
5056    /// | 4 | Ymm, Ymm, Ymm |
5057    /// +---+---------------+
5058    /// ```
5059    #[inline]
5060    pub fn vphaddw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5061    where Assembler<'a>: VphaddwEmitter<A, B, C> {
5062        <Self as VphaddwEmitter<A, B, C>>::vphaddw(self, op0, op1, op2);
5063    }
5064    /// `VPHMINPOSUW` (VPHMINPOSUW). 
5065    /// Determine the minimum unsigned word value in the source operand (second operand) and place the unsigned word in the low word (bits 0-15) of the destination operand (first operand). The word index of the minimum value is stored in bits 16-18 of the destination operand. The remaining upper bits of the destination are set to zero.
5066    ///
5067    ///
5068    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHMINPOSUW.html).
5069    ///
5070    /// Supported operand variants:
5071    ///
5072    /// ```text
5073    /// +---+----------+
5074    /// | # | Operands |
5075    /// +---+----------+
5076    /// | 1 | Xmm, Mem |
5077    /// | 2 | Xmm, Xmm |
5078    /// +---+----------+
5079    /// ```
5080    #[inline]
5081    pub fn vphminposuw<A, B>(&mut self, op0: A, op1: B)
5082    where Assembler<'a>: VphminposuwEmitter<A, B> {
5083        <Self as VphminposuwEmitter<A, B>>::vphminposuw(self, op0, op1);
5084    }
5085    /// `VPHSUBD` (VPHSUBD). 
5086    /// (V)PHSUBW performs horizontal subtraction on each adjacent pair of 16-bit signed integers by subtracting the most significant word from the least significant word of each pair in the source and destination operands, and packs the signed 16-bit results to the destination operand (first operand). (V)PHSUBD performs horizontal subtraction on each adjacent pair of 32-bit signed integers by subtracting the most significant doubleword from the least significant doubleword of each pair, and packs the signed 32-bit result to the destination operand. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
5087    ///
5088    ///
5089    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHSUBW%3APHSUBD.html).
5090    ///
5091    /// Supported operand variants:
5092    ///
5093    /// ```text
5094    /// +---+---------------+
5095    /// | # | Operands      |
5096    /// +---+---------------+
5097    /// | 1 | Xmm, Xmm, Mem |
5098    /// | 2 | Xmm, Xmm, Xmm |
5099    /// | 3 | Ymm, Ymm, Mem |
5100    /// | 4 | Ymm, Ymm, Ymm |
5101    /// +---+---------------+
5102    /// ```
5103    #[inline]
5104    pub fn vphsubd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5105    where Assembler<'a>: VphsubdEmitter<A, B, C> {
5106        <Self as VphsubdEmitter<A, B, C>>::vphsubd(self, op0, op1, op2);
5107    }
5108    /// `VPHSUBSW` (VPHSUBSW). 
5109    /// (V)PHSUBSW performs horizontal subtraction on each adjacent pair of 16-bit signed integers by subtracting the most significant word from the least significant word of each pair in the source and destination operands. The signed, saturated 16-bit results are packed to the destination operand (first operand). When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
5110    ///
5111    ///
5112    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHSUBSW.html).
5113    ///
5114    /// Supported operand variants:
5115    ///
5116    /// ```text
5117    /// +---+---------------+
5118    /// | # | Operands      |
5119    /// +---+---------------+
5120    /// | 1 | Xmm, Xmm, Mem |
5121    /// | 2 | Xmm, Xmm, Xmm |
5122    /// | 3 | Ymm, Ymm, Mem |
5123    /// | 4 | Ymm, Ymm, Ymm |
5124    /// +---+---------------+
5125    /// ```
5126    #[inline]
5127    pub fn vphsubsw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5128    where Assembler<'a>: VphsubswEmitter<A, B, C> {
5129        <Self as VphsubswEmitter<A, B, C>>::vphsubsw(self, op0, op1, op2);
5130    }
5131    /// `VPHSUBW` (VPHSUBW). 
5132    /// (V)PHSUBW performs horizontal subtraction on each adjacent pair of 16-bit signed integers by subtracting the most significant word from the least significant word of each pair in the source and destination operands, and packs the signed 16-bit results to the destination operand (first operand). (V)PHSUBD performs horizontal subtraction on each adjacent pair of 32-bit signed integers by subtracting the most significant doubleword from the least significant doubleword of each pair, and packs the signed 32-bit result to the destination operand. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
5133    ///
5134    ///
5135    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PHSUBW%3APHSUBD.html).
5136    ///
5137    /// Supported operand variants:
5138    ///
5139    /// ```text
5140    /// +---+---------------+
5141    /// | # | Operands      |
5142    /// +---+---------------+
5143    /// | 1 | Xmm, Xmm, Mem |
5144    /// | 2 | Xmm, Xmm, Xmm |
5145    /// | 3 | Ymm, Ymm, Mem |
5146    /// | 4 | Ymm, Ymm, Ymm |
5147    /// +---+---------------+
5148    /// ```
5149    #[inline]
5150    pub fn vphsubw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5151    where Assembler<'a>: VphsubwEmitter<A, B, C> {
5152        <Self as VphsubwEmitter<A, B, C>>::vphsubw(self, op0, op1, op2);
5153    }
5154    /// `VPINSRD` (VPINSRD). 
5155    /// Copies a byte/dword/qword from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other elements in the destination register are left untouched.) The source operand can be a general-purpose register or a memory location. (When the source operand is a general-purpose register, PINSRB copies the low byte of the register.) The destination operand is an XMM register. The count operand is an 8-bit immediate. When specifying a qword[dword, byte] location in an XMM register, the [2, 4] least-significant bit(s) of the count operand specify the location.
5156    ///
5157    ///
5158    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRB%3APINSRD%3APINSRQ.html).
5159    ///
5160    /// Supported operand variants:
5161    ///
5162    /// ```text
5163    /// +---+--------------------+
5164    /// | # | Operands           |
5165    /// +---+--------------------+
5166    /// | 1 | Xmm, Xmm, Gpd, Imm |
5167    /// | 2 | Xmm, Xmm, Mem, Imm |
5168    /// +---+--------------------+
5169    /// ```
5170    #[inline]
5171    pub fn vpinsrd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
5172    where Assembler<'a>: VpinsrdEmitter<A, B, C, D> {
5173        <Self as VpinsrdEmitter<A, B, C, D>>::vpinsrd(self, op0, op1, op2, op3);
5174    }
5175    /// `VPINSRQ` (VPINSRQ). 
5176    /// Copies a byte/dword/qword from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other elements in the destination register are left untouched.) The source operand can be a general-purpose register or a memory location. (When the source operand is a general-purpose register, PINSRB copies the low byte of the register.) The destination operand is an XMM register. The count operand is an 8-bit immediate. When specifying a qword[dword, byte] location in an XMM register, the [2, 4] least-significant bit(s) of the count operand specify the location.
5177    ///
5178    ///
5179    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRB%3APINSRD%3APINSRQ.html).
5180    ///
5181    /// Supported operand variants:
5182    ///
5183    /// ```text
5184    /// +---+--------------------+
5185    /// | # | Operands           |
5186    /// +---+--------------------+
5187    /// | 1 | Xmm, Xmm, Gpd, Imm |
5188    /// | 2 | Xmm, Xmm, Gpq, Imm |
5189    /// | 3 | Xmm, Xmm, Mem, Imm |
5190    /// +---+--------------------+
5191    /// ```
5192    #[inline]
5193    pub fn vpinsrq<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
5194    where Assembler<'a>: VpinsrqEmitter<A, B, C, D> {
5195        <Self as VpinsrqEmitter<A, B, C, D>>::vpinsrq(self, op0, op1, op2, op3);
5196    }
5197    /// `VPMOVMSKB` (VPMOVMSKB). 
5198    /// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
5199    ///
5200    ///
5201    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
5202    ///
5203    /// Supported operand variants:
5204    ///
5205    /// ```text
5206    /// +---+----------+
5207    /// | # | Operands |
5208    /// +---+----------+
5209    /// | 1 | Gpd, Xmm |
5210    /// | 2 | Gpd, Ymm |
5211    /// +---+----------+
5212    /// ```
5213    #[inline]
5214    pub fn vpmovmskb<A, B>(&mut self, op0: A, op1: B)
5215    where Assembler<'a>: VpmovmskbEmitter<A, B> {
5216        <Self as VpmovmskbEmitter<A, B>>::vpmovmskb(self, op0, op1);
5217    }
5218    /// `VPOR` (VPOR). 
5219    /// Performs a bitwise logical OR operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is set to 1 if either or both of the corresponding bits of the first and second operands are 1; otherwise, it is set to 0.
5220    ///
5221    ///
5222    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/POR.html).
5223    ///
5224    /// Supported operand variants:
5225    ///
5226    /// ```text
5227    /// +---+---------------+
5228    /// | # | Operands      |
5229    /// +---+---------------+
5230    /// | 1 | Xmm, Xmm, Mem |
5231    /// | 2 | Xmm, Xmm, Xmm |
5232    /// | 3 | Ymm, Ymm, Mem |
5233    /// | 4 | Ymm, Ymm, Ymm |
5234    /// +---+---------------+
5235    /// ```
5236    #[inline]
5237    pub fn vpor<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5238    where Assembler<'a>: VporEmitter<A, B, C> {
5239        <Self as VporEmitter<A, B, C>>::vpor(self, op0, op1, op2);
5240    }
5241    /// `VPSIGNB` (VPSIGNB). 
5242    /// (V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
5243    ///
5244    ///
5245    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html).
5246    ///
5247    /// Supported operand variants:
5248    ///
5249    /// ```text
5250    /// +---+---------------+
5251    /// | # | Operands      |
5252    /// +---+---------------+
5253    /// | 1 | Xmm, Xmm, Mem |
5254    /// | 2 | Xmm, Xmm, Xmm |
5255    /// | 3 | Ymm, Ymm, Mem |
5256    /// | 4 | Ymm, Ymm, Ymm |
5257    /// +---+---------------+
5258    /// ```
5259    #[inline]
5260    pub fn vpsignb<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5261    where Assembler<'a>: VpsignbEmitter<A, B, C> {
5262        <Self as VpsignbEmitter<A, B, C>>::vpsignb(self, op0, op1, op2);
5263    }
5264    /// `VPSIGND` (VPSIGND). 
5265    /// (V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
5266    ///
5267    ///
5268    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html).
5269    ///
5270    /// Supported operand variants:
5271    ///
5272    /// ```text
5273    /// +---+---------------+
5274    /// | # | Operands      |
5275    /// +---+---------------+
5276    /// | 1 | Xmm, Xmm, Mem |
5277    /// | 2 | Xmm, Xmm, Xmm |
5278    /// | 3 | Ymm, Ymm, Mem |
5279    /// | 4 | Ymm, Ymm, Ymm |
5280    /// +---+---------------+
5281    /// ```
5282    #[inline]
5283    pub fn vpsignd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5284    where Assembler<'a>: VpsigndEmitter<A, B, C> {
5285        <Self as VpsigndEmitter<A, B, C>>::vpsignd(self, op0, op1, op2);
5286    }
5287    /// `VPSIGNW` (VPSIGNW). 
5288    /// (V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.
5289    ///
5290    ///
5291    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html).
5292    ///
5293    /// Supported operand variants:
5294    ///
5295    /// ```text
5296    /// +---+---------------+
5297    /// | # | Operands      |
5298    /// +---+---------------+
5299    /// | 1 | Xmm, Xmm, Mem |
5300    /// | 2 | Xmm, Xmm, Xmm |
5301    /// | 3 | Ymm, Ymm, Mem |
5302    /// | 4 | Ymm, Ymm, Ymm |
5303    /// +---+---------------+
5304    /// ```
5305    #[inline]
5306    pub fn vpsignw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5307    where Assembler<'a>: VpsignwEmitter<A, B, C> {
5308        <Self as VpsignwEmitter<A, B, C>>::vpsignw(self, op0, op1, op2);
5309    }
5310    /// `VPTEST` (VPTEST). 
5311    /// PTEST and VPTEST set the ZF flag if all bits in the result are 0 of the bitwise AND of the first source operand (first operand) and the second source operand (second operand). VPTEST sets the CF flag if all bits in the result are 0 of the bitwise AND of the second source operand (second operand) and the logical NOT of the destination operand.
5312    ///
5313    ///
5314    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PTEST.html).
5315    ///
5316    /// Supported operand variants:
5317    ///
5318    /// ```text
5319    /// +---+----------+
5320    /// | # | Operands |
5321    /// +---+----------+
5322    /// | 1 | Xmm, Mem |
5323    /// | 2 | Xmm, Xmm |
5324    /// | 3 | Ymm, Mem |
5325    /// | 4 | Ymm, Ymm |
5326    /// +---+----------+
5327    /// ```
5328    #[inline]
5329    pub fn vptest<A, B>(&mut self, op0: A, op1: B)
5330    where Assembler<'a>: VptestEmitter<A, B> {
5331        <Self as VptestEmitter<A, B>>::vptest(self, op0, op1);
5332    }
5333    /// `VPXOR` (VPXOR). 
5334    /// Performs a bitwise logical exclusive-OR (XOR) operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is 1 if the corresponding bits of the two operands are different; each bit is 0 if the corresponding bits of the operands are the same.
5335    ///
5336    ///
5337    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PXOR.html).
5338    ///
5339    /// Supported operand variants:
5340    ///
5341    /// ```text
5342    /// +---+---------------+
5343    /// | # | Operands      |
5344    /// +---+---------------+
5345    /// | 1 | Xmm, Xmm, Mem |
5346    /// | 2 | Xmm, Xmm, Xmm |
5347    /// | 3 | Ymm, Ymm, Mem |
5348    /// | 4 | Ymm, Ymm, Ymm |
5349    /// +---+---------------+
5350    /// ```
5351    #[inline]
5352    pub fn vpxor<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5353    where Assembler<'a>: VpxorEmitter<A, B, C> {
5354        <Self as VpxorEmitter<A, B, C>>::vpxor(self, op0, op1, op2);
5355    }
5356    /// `VRCPPS` (VRCPPS). 
5357    /// Performs a SIMD computation of the approximate reciprocals of the four packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
5358    ///
5359    ///
5360    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPPS.html).
5361    ///
5362    /// Supported operand variants:
5363    ///
5364    /// ```text
5365    /// +---+----------+
5366    /// | # | Operands |
5367    /// +---+----------+
5368    /// | 1 | Xmm, Mem |
5369    /// | 2 | Xmm, Xmm |
5370    /// | 3 | Ymm, Mem |
5371    /// | 4 | Ymm, Ymm |
5372    /// +---+----------+
5373    /// ```
5374    #[inline]
5375    pub fn vrcpps<A, B>(&mut self, op0: A, op1: B)
5376    where Assembler<'a>: VrcppsEmitter<A, B> {
5377        <Self as VrcppsEmitter<A, B>>::vrcpps(self, op0, op1);
5378    }
5379    /// `VRCPSS` (VRCPSS). 
5380    /// Computes of an approximate reciprocal of the low single precision floating-point value in the source operand (second operand) and stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
5381    ///
5382    ///
5383    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPSS.html).
5384    ///
5385    /// Supported operand variants:
5386    ///
5387    /// ```text
5388    /// +---+---------------+
5389    /// | # | Operands      |
5390    /// +---+---------------+
5391    /// | 1 | Xmm, Xmm, Mem |
5392    /// | 2 | Xmm, Xmm, Xmm |
5393    /// +---+---------------+
5394    /// ```
5395    #[inline]
5396    pub fn vrcpss<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5397    where Assembler<'a>: VrcpssEmitter<A, B, C> {
5398        <Self as VrcpssEmitter<A, B, C>>::vrcpss(self, op0, op1, op2);
5399    }
5400    /// `VROUNDPD` (VROUNDPD). 
5401    /// Round the 2 double precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double precision floating-point value.
5402    ///
5403    ///
5404    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDPD.html).
5405    ///
5406    /// Supported operand variants:
5407    ///
5408    /// ```text
5409    /// +---+---------------+
5410    /// | # | Operands      |
5411    /// +---+---------------+
5412    /// | 1 | Xmm, Mem, Imm |
5413    /// | 2 | Xmm, Xmm, Imm |
5414    /// | 3 | Ymm, Mem, Imm |
5415    /// | 4 | Ymm, Ymm, Imm |
5416    /// +---+---------------+
5417    /// ```
5418    #[inline]
5419    pub fn vroundpd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5420    where Assembler<'a>: VroundpdEmitter<A, B, C> {
5421        <Self as VroundpdEmitter<A, B, C>>::vroundpd(self, op0, op1, op2);
5422    }
5423    /// `VROUNDPS` (VROUNDPS). 
5424    /// Round the 4 single precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single precision floating-point value.
5425    ///
5426    ///
5427    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDPS.html).
5428    ///
5429    /// Supported operand variants:
5430    ///
5431    /// ```text
5432    /// +---+---------------+
5433    /// | # | Operands      |
5434    /// +---+---------------+
5435    /// | 1 | Xmm, Mem, Imm |
5436    /// | 2 | Xmm, Xmm, Imm |
5437    /// | 3 | Ymm, Mem, Imm |
5438    /// | 4 | Ymm, Ymm, Imm |
5439    /// +---+---------------+
5440    /// ```
5441    #[inline]
5442    pub fn vroundps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5443    where Assembler<'a>: VroundpsEmitter<A, B, C> {
5444        <Self as VroundpsEmitter<A, B, C>>::vroundps(self, op0, op1, op2);
5445    }
5446    /// `VROUNDSD` (VROUNDSD). 
5447    /// Round the double precision floating-point value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.
5448    ///
5449    ///
5450    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDSD.html).
5451    ///
5452    /// Supported operand variants:
5453    ///
5454    /// ```text
5455    /// +---+--------------------+
5456    /// | # | Operands           |
5457    /// +---+--------------------+
5458    /// | 1 | Xmm, Xmm, Mem, Imm |
5459    /// | 2 | Xmm, Xmm, Xmm, Imm |
5460    /// +---+--------------------+
5461    /// ```
5462    #[inline]
5463    pub fn vroundsd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
5464    where Assembler<'a>: VroundsdEmitter<A, B, C, D> {
5465        <Self as VroundsdEmitter<A, B, C, D>>::vroundsd(self, op0, op1, op2, op3);
5466    }
5467    /// `VROUNDSS` (VROUNDSS). 
5468    /// Round the single precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single precision floating-point input to an integer value and returns the result as a single precision floating-point value in the lowest position. The upper three single precision floating-point values in the destination are retained.
5469    ///
5470    ///
5471    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ROUNDSS.html).
5472    ///
5473    /// Supported operand variants:
5474    ///
5475    /// ```text
5476    /// +---+--------------------+
5477    /// | # | Operands           |
5478    /// +---+--------------------+
5479    /// | 1 | Xmm, Xmm, Mem, Imm |
5480    /// | 2 | Xmm, Xmm, Xmm, Imm |
5481    /// +---+--------------------+
5482    /// ```
5483    #[inline]
5484    pub fn vroundss<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
5485    where Assembler<'a>: VroundssEmitter<A, B, C, D> {
5486        <Self as VroundssEmitter<A, B, C, D>>::vroundss(self, op0, op1, op2, op3);
5487    }
5488    /// `VRSQRTPS` (VRSQRTPS). 
5489    /// Performs a SIMD computation of the approximate reciprocals of the square roots of the four packed single precision floating-point values in the source operand (second operand) and stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
5490    ///
5491    ///
5492    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTPS.html).
5493    ///
5494    /// Supported operand variants:
5495    ///
5496    /// ```text
5497    /// +---+----------+
5498    /// | # | Operands |
5499    /// +---+----------+
5500    /// | 1 | Xmm, Mem |
5501    /// | 2 | Xmm, Xmm |
5502    /// | 3 | Ymm, Mem |
5503    /// | 4 | Ymm, Ymm |
5504    /// +---+----------+
5505    /// ```
5506    #[inline]
5507    pub fn vrsqrtps<A, B>(&mut self, op0: A, op1: B)
5508    where Assembler<'a>: VrsqrtpsEmitter<A, B> {
5509        <Self as VrsqrtpsEmitter<A, B>>::vrsqrtps(self, op0, op1);
5510    }
5511    /// `VRSQRTSS` (VRSQRTSS). 
5512    /// Computes an approximate reciprocal of the square root of the low single precision floating-point value in the source operand (second operand) stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
5513    ///
5514    ///
5515    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTSS.html).
5516    ///
5517    /// Supported operand variants:
5518    ///
5519    /// ```text
5520    /// +---+---------------+
5521    /// | # | Operands      |
5522    /// +---+---------------+
5523    /// | 1 | Xmm, Xmm, Mem |
5524    /// | 2 | Xmm, Xmm, Xmm |
5525    /// +---+---------------+
5526    /// ```
5527    #[inline]
5528    pub fn vrsqrtss<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5529    where Assembler<'a>: VrsqrtssEmitter<A, B, C> {
5530        <Self as VrsqrtssEmitter<A, B, C>>::vrsqrtss(self, op0, op1, op2);
5531    }
5532    /// `VSTMXCSR` (VSTMXCSR). 
5533    /// Stores the contents of the MXCSR control and status register to the destination operand. The destination operand is a 32-bit memory location. The reserved bits in the MXCSR register are stored as 0s.
5534    ///
5535    ///
5536    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/STMXCSR.html).
5537    ///
5538    /// Supported operand variants:
5539    ///
5540    /// ```text
5541    /// +---+----------+
5542    /// | # | Operands |
5543    /// +---+----------+
5544    /// | 1 | Mem      |
5545    /// +---+----------+
5546    /// ```
5547    #[inline]
5548    pub fn vstmxcsr<A>(&mut self, op0: A)
5549    where Assembler<'a>: VstmxcsrEmitter<A> {
5550        <Self as VstmxcsrEmitter<A>>::vstmxcsr(self, op0);
5551    }
5552    /// `VTESTPD` (VTESTPD). 
5553    /// VTESTPS performs a bitwise comparison of all the sign bits of the packed single-precision elements in the first source operation and corresponding sign bits in the second source operand. If the AND of the source sign bits with the dest sign bits produces all zeros, the ZF is set else the ZF is clear. If the AND of the source sign bits with the inverted dest sign bits produces all zeros the CF is set else the CF is clear. An attempt to execute VTESTPS with VEX.W=1 will cause #UD.
5554    ///
5555    ///
5556    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VTESTPD%3AVTESTPS.html).
5557    ///
5558    /// Supported operand variants:
5559    ///
5560    /// ```text
5561    /// +---+----------+
5562    /// | # | Operands |
5563    /// +---+----------+
5564    /// | 1 | Xmm, Mem |
5565    /// | 2 | Xmm, Xmm |
5566    /// | 3 | Ymm, Mem |
5567    /// | 4 | Ymm, Ymm |
5568    /// +---+----------+
5569    /// ```
5570    #[inline]
5571    pub fn vtestpd<A, B>(&mut self, op0: A, op1: B)
5572    where Assembler<'a>: VtestpdEmitter<A, B> {
5573        <Self as VtestpdEmitter<A, B>>::vtestpd(self, op0, op1);
5574    }
5575    /// `VTESTPS` (VTESTPS). 
5576    /// VTESTPS performs a bitwise comparison of all the sign bits of the packed single-precision elements in the first source operation and corresponding sign bits in the second source operand. If the AND of the source sign bits with the dest sign bits produces all zeros, the ZF is set else the ZF is clear. If the AND of the source sign bits with the inverted dest sign bits produces all zeros the CF is set else the CF is clear. An attempt to execute VTESTPS with VEX.W=1 will cause #UD.
5577    ///
5578    ///
5579    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VTESTPD%3AVTESTPS.html).
5580    ///
5581    /// Supported operand variants:
5582    ///
5583    /// ```text
5584    /// +---+----------+
5585    /// | # | Operands |
5586    /// +---+----------+
5587    /// | 1 | Xmm, Mem |
5588    /// | 2 | Xmm, Xmm |
5589    /// | 3 | Ymm, Mem |
5590    /// | 4 | Ymm, Ymm |
5591    /// +---+----------+
5592    /// ```
5593    #[inline]
5594    pub fn vtestps<A, B>(&mut self, op0: A, op1: B)
5595    where Assembler<'a>: VtestpsEmitter<A, B> {
5596        <Self as VtestpsEmitter<A, B>>::vtestps(self, op0, op1);
5597    }
5598    /// `VZEROALL` (VZEROALL). 
5599    /// In 64-bit mode, the instruction zeroes XMM0-XMM15, YMM0-YMM15, and ZMM0-ZMM15. Outside 64-bit mode, it zeroes only XMM0-XMM7, YMM0-YMM7, and ZMM0-ZMM7. VZEROALL does not modify ZMM16-ZMM31.
5600    ///
5601    ///
5602    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VZEROALL.html).
5603    ///
5604    /// Supported operand variants:
5605    ///
5606    /// ```text
5607    /// +---+----------+
5608    /// | # | Operands |
5609    /// +---+----------+
5610    /// | 1 | (none)   |
5611    /// +---+----------+
5612    /// ```
5613    #[inline]
5614    pub fn vzeroall(&mut self)
5615    where Assembler<'a>: VzeroallEmitter {
5616        <Self as VzeroallEmitter>::vzeroall(self);
5617    }
5618    /// `VZEROUPPER` (VZEROUPPER). 
5619    /// In 64-bit mode, the instruction zeroes the bits in positions 128 and higher in YMM0-YMM15 and ZMM0-ZMM15. Outside 64-bit mode, it zeroes those bits only in YMM0-YMM7 and ZMM0-ZMM7. VZEROUPPER does not modify the lower 128 bits of these registers and it does not modify ZMM16-ZMM31.
5620    ///
5621    ///
5622    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VZEROUPPER.html).
5623    ///
5624    /// Supported operand variants:
5625    ///
5626    /// ```text
5627    /// +---+----------+
5628    /// | # | Operands |
5629    /// +---+----------+
5630    /// | 1 | (none)   |
5631    /// +---+----------+
5632    /// ```
5633    #[inline]
5634    pub fn vzeroupper(&mut self)
5635    where Assembler<'a>: VzeroupperEmitter {
5636        <Self as VzeroupperEmitter>::vzeroupper(self);
5637    }
5638}