asmkit/x86/features/
SSE2.rs

1use crate::x86::assembler::*;
2use crate::x86::operands::*;
3use super::super::opcodes::*;
4use crate::core::emitter::*;
5use crate::core::operand::*;
6
7/// A dummy operand that represents no register. Here just for simplicity.
8const NOREG: Operand = Operand::new();
9
10/// `LFENCE` (LFENCE). 
11/// Performs a serializing operation on all load-from-memory instructions that were issued prior the LFENCE instruction. Specifically, LFENCE does not execute until all prior instructions have completed locally, and no later instruction begins execution until LFENCE completes. In particular, an instruction that loads from memory and that precedes an LFENCE receives data from memory prior to completion of the LFENCE. (An LFENCE that follows an instruction that stores to memory might complete before the data being stored have become globally visible.) Instructions following an LFENCE may be fetched from memory before the LFENCE, but they will not execute (even speculatively) until the LFENCE completes.
12///
13///
14/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LFENCE.html).
15///
16/// Supported operand variants:
17///
18/// ```text
19/// +---+----------+
20/// | # | Operands |
21/// +---+----------+
22/// | 1 | (none)   |
23/// +---+----------+
24/// ```
25pub trait LfenceEmitter {
26    fn lfence(&mut self);
27}
28
29impl<'a> LfenceEmitter for Assembler<'a> {
30    fn lfence(&mut self) {
31        self.emit(LFENCE, &NOREG, &NOREG, &NOREG, &NOREG);
32    }
33}
34
35/// `MFENCE`.
36///
37/// Supported operand variants:
38///
39/// ```text
40/// +---+----------+
41/// | # | Operands |
42/// +---+----------+
43/// | 1 | (none)   |
44/// +---+----------+
45/// ```
46pub trait MfenceEmitter {
47    fn mfence(&mut self);
48}
49
50impl<'a> MfenceEmitter for Assembler<'a> {
51    fn mfence(&mut self) {
52        self.emit(MFENCE, &NOREG, &NOREG, &NOREG, &NOREG);
53    }
54}
55
56/// `MMX_CVTPD2PI` (CVTPD2PI). 
57/// Converts two packed double precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand).
58///
59///
60/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPD2PI.html).
61///
62/// Supported operand variants:
63///
64/// ```text
65/// +---+----------+
66/// | # | Operands |
67/// +---+----------+
68/// | 1 | Mm, Mem  |
69/// | 2 | Mm, Xmm  |
70/// +---+----------+
71/// ```
72pub trait MmxCvtpd2piEmitter<A, B> {
73    fn mmx_cvtpd2pi(&mut self, op0: A, op1: B);
74}
75
76impl<'a> MmxCvtpd2piEmitter<Mm, Xmm> for Assembler<'a> {
77    fn mmx_cvtpd2pi(&mut self, op0: Mm, op1: Xmm) {
78        self.emit(MMX_CVTPD2PIRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
79    }
80}
81
82impl<'a> MmxCvtpd2piEmitter<Mm, Mem> for Assembler<'a> {
83    fn mmx_cvtpd2pi(&mut self, op0: Mm, op1: Mem) {
84        self.emit(MMX_CVTPD2PIRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
85    }
86}
87
88/// `MMX_CVTPI2PD` (CVTPI2PD). 
89/// Converts two packed signed doubleword integers in the source operand (second operand) to two packed double precision floating-point values in the destination operand (first operand).
90///
91///
92/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPI2PD.html).
93///
94/// Supported operand variants:
95///
96/// ```text
97/// +---+----------+
98/// | # | Operands |
99/// +---+----------+
100/// | 1 | Xmm, Mem |
101/// | 2 | Xmm, Mm  |
102/// +---+----------+
103/// ```
104pub trait MmxCvtpi2pdEmitter<A, B> {
105    fn mmx_cvtpi2pd(&mut self, op0: A, op1: B);
106}
107
108impl<'a> MmxCvtpi2pdEmitter<Xmm, Mm> for Assembler<'a> {
109    fn mmx_cvtpi2pd(&mut self, op0: Xmm, op1: Mm) {
110        self.emit(MMX_CVTPI2PDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
111    }
112}
113
114impl<'a> MmxCvtpi2pdEmitter<Xmm, Mem> for Assembler<'a> {
115    fn mmx_cvtpi2pd(&mut self, op0: Xmm, op1: Mem) {
116        self.emit(MMX_CVTPI2PDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
117    }
118}
119
120/// `MMX_CVTPI2PS` (CVTPI2PS). 
121/// Converts two packed signed doubleword integers in the source operand (second operand) to two packed single precision floating-point values in the destination operand (first operand).
122///
123///
124/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPI2PS.html).
125///
126/// Supported operand variants:
127///
128/// ```text
129/// +---+----------+
130/// | # | Operands |
131/// +---+----------+
132/// | 1 | Xmm, Mem |
133/// | 2 | Xmm, Mm  |
134/// +---+----------+
135/// ```
136pub trait MmxCvtpi2psEmitter<A, B> {
137    fn mmx_cvtpi2ps(&mut self, op0: A, op1: B);
138}
139
140impl<'a> MmxCvtpi2psEmitter<Xmm, Mm> for Assembler<'a> {
141    fn mmx_cvtpi2ps(&mut self, op0: Xmm, op1: Mm) {
142        self.emit(MMX_CVTPI2PSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
143    }
144}
145
146impl<'a> MmxCvtpi2psEmitter<Xmm, Mem> for Assembler<'a> {
147    fn mmx_cvtpi2ps(&mut self, op0: Xmm, op1: Mem) {
148        self.emit(MMX_CVTPI2PSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
149    }
150}
151
152/// `MMX_CVTPS2PI` (CVTPS2PI). 
153/// Converts two packed single precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand).
154///
155///
156/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPS2PI.html).
157///
158/// Supported operand variants:
159///
160/// ```text
161/// +---+----------+
162/// | # | Operands |
163/// +---+----------+
164/// | 1 | Mm, Mem  |
165/// | 2 | Mm, Xmm  |
166/// +---+----------+
167/// ```
168pub trait MmxCvtps2piEmitter<A, B> {
169    fn mmx_cvtps2pi(&mut self, op0: A, op1: B);
170}
171
172impl<'a> MmxCvtps2piEmitter<Mm, Xmm> for Assembler<'a> {
173    fn mmx_cvtps2pi(&mut self, op0: Mm, op1: Xmm) {
174        self.emit(MMX_CVTPS2PIRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
175    }
176}
177
178impl<'a> MmxCvtps2piEmitter<Mm, Mem> for Assembler<'a> {
179    fn mmx_cvtps2pi(&mut self, op0: Mm, op1: Mem) {
180        self.emit(MMX_CVTPS2PIRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
181    }
182}
183
184/// `MMX_CVTTPD2PI` (CVTTPD2PI). 
185/// Converts two packed double precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand). The source operand can be an XMM register or a 128-bit memory location. The destination operand is an MMX technology register.
186///
187///
188/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPD2PI.html).
189///
190/// Supported operand variants:
191///
192/// ```text
193/// +---+----------+
194/// | # | Operands |
195/// +---+----------+
196/// | 1 | Mm, Mem  |
197/// | 2 | Mm, Xmm  |
198/// +---+----------+
199/// ```
200pub trait MmxCvttpd2piEmitter<A, B> {
201    fn mmx_cvttpd2pi(&mut self, op0: A, op1: B);
202}
203
204impl<'a> MmxCvttpd2piEmitter<Mm, Xmm> for Assembler<'a> {
205    fn mmx_cvttpd2pi(&mut self, op0: Mm, op1: Xmm) {
206        self.emit(MMX_CVTTPD2PIRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
207    }
208}
209
210impl<'a> MmxCvttpd2piEmitter<Mm, Mem> for Assembler<'a> {
211    fn mmx_cvttpd2pi(&mut self, op0: Mm, op1: Mem) {
212        self.emit(MMX_CVTTPD2PIRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
213    }
214}
215
216/// `MMX_CVTTPS2PI` (CVTTPS2PI). 
217/// Converts two packed single precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is an MMX technology register. When the source operand is an XMM register, the two single precision floating-point values are contained in the low quadword of the register.
218///
219///
220/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPS2PI.html).
221///
222/// Supported operand variants:
223///
224/// ```text
225/// +---+----------+
226/// | # | Operands |
227/// +---+----------+
228/// | 1 | Mm, Mem  |
229/// | 2 | Mm, Xmm  |
230/// +---+----------+
231/// ```
232pub trait MmxCvttps2piEmitter<A, B> {
233    fn mmx_cvttps2pi(&mut self, op0: A, op1: B);
234}
235
236impl<'a> MmxCvttps2piEmitter<Mm, Xmm> for Assembler<'a> {
237    fn mmx_cvttps2pi(&mut self, op0: Mm, op1: Xmm) {
238        self.emit(MMX_CVTTPS2PIRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
239    }
240}
241
242impl<'a> MmxCvttps2piEmitter<Mm, Mem> for Assembler<'a> {
243    fn mmx_cvttps2pi(&mut self, op0: Mm, op1: Mem) {
244        self.emit(MMX_CVTTPS2PIRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
245    }
246}
247
248/// `MOVNTI` (MOVNTI). 
249/// Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to minimize cache pollution during the write to memory. The source operand is a general-purpose register. The destination operand is a 32-bit memory location.
250///
251///
252/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTI.html).
253///
254/// Supported operand variants:
255///
256/// ```text
257/// +---+----------+
258/// | # | Operands |
259/// +---+----------+
260/// | 1 | Mem, Gpd |
261/// | 2 | Mem, Gpq |
262/// +---+----------+
263/// ```
264pub trait MovntiEmitter<A, B> {
265    fn movnti(&mut self, op0: A, op1: B);
266}
267
268impl<'a> MovntiEmitter<Mem, Gpd> for Assembler<'a> {
269    fn movnti(&mut self, op0: Mem, op1: Gpd) {
270        self.emit(MOVNTI32MR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
271    }
272}
273
274impl<'a> MovntiEmitter<Mem, Gpq> for Assembler<'a> {
275    fn movnti(&mut self, op0: Mem, op1: Gpq) {
276        self.emit(MOVNTI64MR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
277    }
278}
279
280/// `SSE_ADDPD` (ADDPD). 
281/// Adds two, four or eight packed double precision floating-point values from the first source operand to the second source operand, and stores the packed double precision floating-point result in the destination operand.
282///
283///
284/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDPD.html).
285///
286/// Supported operand variants:
287///
288/// ```text
289/// +---+----------+
290/// | # | Operands |
291/// +---+----------+
292/// | 1 | Xmm, Mem |
293/// | 2 | Xmm, Xmm |
294/// +---+----------+
295/// ```
296pub trait SseAddpdEmitter<A, B> {
297    fn sse_addpd(&mut self, op0: A, op1: B);
298}
299
300impl<'a> SseAddpdEmitter<Xmm, Xmm> for Assembler<'a> {
301    fn sse_addpd(&mut self, op0: Xmm, op1: Xmm) {
302        self.emit(SSE_ADDPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
303    }
304}
305
306impl<'a> SseAddpdEmitter<Xmm, Mem> for Assembler<'a> {
307    fn sse_addpd(&mut self, op0: Xmm, op1: Mem) {
308        self.emit(SSE_ADDPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
309    }
310}
311
312/// `SSE_ADDSD` (ADDSD). 
313/// Adds the low double precision floating-point values from the second source operand and the first source operand and stores the double precision floating-point result in the destination operand.
314///
315///
316/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSD.html).
317///
318/// Supported operand variants:
319///
320/// ```text
321/// +---+----------+
322/// | # | Operands |
323/// +---+----------+
324/// | 1 | Xmm, Mem |
325/// | 2 | Xmm, Xmm |
326/// +---+----------+
327/// ```
328pub trait SseAddsdEmitter<A, B> {
329    fn sse_addsd(&mut self, op0: A, op1: B);
330}
331
332impl<'a> SseAddsdEmitter<Xmm, Xmm> for Assembler<'a> {
333    fn sse_addsd(&mut self, op0: Xmm, op1: Xmm) {
334        self.emit(SSE_ADDSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
335    }
336}
337
338impl<'a> SseAddsdEmitter<Xmm, Mem> for Assembler<'a> {
339    fn sse_addsd(&mut self, op0: Xmm, op1: Mem) {
340        self.emit(SSE_ADDSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
341    }
342}
343
344/// `SSE_ANDNPD` (ANDNPD). 
345/// Performs a bitwise logical AND NOT of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
346///
347///
348/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDNPD.html).
349///
350/// Supported operand variants:
351///
352/// ```text
353/// +---+----------+
354/// | # | Operands |
355/// +---+----------+
356/// | 1 | Xmm, Mem |
357/// | 2 | Xmm, Xmm |
358/// +---+----------+
359/// ```
360pub trait SseAndnpdEmitter<A, B> {
361    fn sse_andnpd(&mut self, op0: A, op1: B);
362}
363
364impl<'a> SseAndnpdEmitter<Xmm, Xmm> for Assembler<'a> {
365    fn sse_andnpd(&mut self, op0: Xmm, op1: Xmm) {
366        self.emit(SSE_ANDNPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
367    }
368}
369
370impl<'a> SseAndnpdEmitter<Xmm, Mem> for Assembler<'a> {
371    fn sse_andnpd(&mut self, op0: Xmm, op1: Mem) {
372        self.emit(SSE_ANDNPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
373    }
374}
375
376/// `SSE_ANDPD` (ANDPD). 
377/// Performs a bitwise logical AND of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
378///
379///
380/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDPD.html).
381///
382/// Supported operand variants:
383///
384/// ```text
385/// +---+----------+
386/// | # | Operands |
387/// +---+----------+
388/// | 1 | Xmm, Mem |
389/// | 2 | Xmm, Xmm |
390/// +---+----------+
391/// ```
392pub trait SseAndpdEmitter<A, B> {
393    fn sse_andpd(&mut self, op0: A, op1: B);
394}
395
396impl<'a> SseAndpdEmitter<Xmm, Xmm> for Assembler<'a> {
397    fn sse_andpd(&mut self, op0: Xmm, op1: Xmm) {
398        self.emit(SSE_ANDPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
399    }
400}
401
402impl<'a> SseAndpdEmitter<Xmm, Mem> for Assembler<'a> {
403    fn sse_andpd(&mut self, op0: Xmm, op1: Mem) {
404        self.emit(SSE_ANDPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
405    }
406}
407
408/// `SSE_CMPPD` (CMPPD). 
409/// Performs a SIMD compare of the packed double precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.
410///
411///
412/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPD.html).
413///
414/// Supported operand variants:
415///
416/// ```text
417/// +---+---------------+
418/// | # | Operands      |
419/// +---+---------------+
420/// | 1 | Xmm, Mem, Imm |
421/// | 2 | Xmm, Xmm, Imm |
422/// +---+---------------+
423/// ```
424pub trait SseCmppdEmitter<A, B, C> {
425    fn sse_cmppd(&mut self, op0: A, op1: B, op2: C);
426}
427
428impl<'a> SseCmppdEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
429    fn sse_cmppd(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
430        self.emit(SSE_CMPPDRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
431    }
432}
433
434impl<'a> SseCmppdEmitter<Xmm, Mem, Imm> for Assembler<'a> {
435    fn sse_cmppd(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
436        self.emit(SSE_CMPPDRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
437    }
438}
439
440/// `SSE_CMPSD` (CMPSD). 
441/// Compares the byte, word, doubleword, or quadword specified with the first source operand with the byte, word, doubleword, or quadword specified with the second source operand and sets the status flags in the EFLAGS register according to the results.
442///
443///
444/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPS%3ACMPSB%3ACMPSW%3ACMPSD%3ACMPSQ.html).
445///
446/// Supported operand variants:
447///
448/// ```text
449/// +---+---------------+
450/// | # | Operands      |
451/// +---+---------------+
452/// | 1 | Xmm, Mem, Imm |
453/// | 2 | Xmm, Xmm, Imm |
454/// +---+---------------+
455/// ```
456pub trait SseCmpsdEmitter<A, B, C> {
457    fn sse_cmpsd(&mut self, op0: A, op1: B, op2: C);
458}
459
460impl<'a> SseCmpsdEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
461    fn sse_cmpsd(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
462        self.emit(SSE_CMPSDRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
463    }
464}
465
466impl<'a> SseCmpsdEmitter<Xmm, Mem, Imm> for Assembler<'a> {
467    fn sse_cmpsd(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
468        self.emit(SSE_CMPSDRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
469    }
470}
471
472/// `SSE_COMISD` (COMISD). 
473/// Compares the double precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
474///
475///
476/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/COMISD.html).
477///
478/// Supported operand variants:
479///
480/// ```text
481/// +---+----------+
482/// | # | Operands |
483/// +---+----------+
484/// | 1 | Xmm, Mem |
485/// | 2 | Xmm, Xmm |
486/// +---+----------+
487/// ```
488pub trait SseComisdEmitter<A, B> {
489    fn sse_comisd(&mut self, op0: A, op1: B);
490}
491
492impl<'a> SseComisdEmitter<Xmm, Xmm> for Assembler<'a> {
493    fn sse_comisd(&mut self, op0: Xmm, op1: Xmm) {
494        self.emit(SSE_COMISDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
495    }
496}
497
498impl<'a> SseComisdEmitter<Xmm, Mem> for Assembler<'a> {
499    fn sse_comisd(&mut self, op0: Xmm, op1: Mem) {
500        self.emit(SSE_COMISDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
501    }
502}
503
504/// `SSE_CVTDQ2PD` (CVTDQ2PD). 
505/// Converts two, four or eight packed signed doubleword integers in the source operand (the second operand) to two, four or eight packed double precision floating-point values in the destination operand (the first operand).
506///
507///
508/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTDQ2PD.html).
509///
510/// Supported operand variants:
511///
512/// ```text
513/// +---+----------+
514/// | # | Operands |
515/// +---+----------+
516/// | 1 | Xmm, Mem |
517/// | 2 | Xmm, Xmm |
518/// +---+----------+
519/// ```
520pub trait SseCvtdq2pdEmitter<A, B> {
521    fn sse_cvtdq2pd(&mut self, op0: A, op1: B);
522}
523
524impl<'a> SseCvtdq2pdEmitter<Xmm, Xmm> for Assembler<'a> {
525    fn sse_cvtdq2pd(&mut self, op0: Xmm, op1: Xmm) {
526        self.emit(SSE_CVTDQ2PDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
527    }
528}
529
530impl<'a> SseCvtdq2pdEmitter<Xmm, Mem> for Assembler<'a> {
531    fn sse_cvtdq2pd(&mut self, op0: Xmm, op1: Mem) {
532        self.emit(SSE_CVTDQ2PDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
533    }
534}
535
536/// `SSE_CVTDQ2PS` (CVTDQ2PS). 
537/// Converts four, eight or sixteen packed signed doubleword integers in the source operand to four, eight or sixteen packed single precision floating-point values in the destination operand.
538///
539///
540/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTDQ2PS.html).
541///
542/// Supported operand variants:
543///
544/// ```text
545/// +---+----------+
546/// | # | Operands |
547/// +---+----------+
548/// | 1 | Xmm, Mem |
549/// | 2 | Xmm, Xmm |
550/// +---+----------+
551/// ```
552pub trait SseCvtdq2psEmitter<A, B> {
553    fn sse_cvtdq2ps(&mut self, op0: A, op1: B);
554}
555
556impl<'a> SseCvtdq2psEmitter<Xmm, Xmm> for Assembler<'a> {
557    fn sse_cvtdq2ps(&mut self, op0: Xmm, op1: Xmm) {
558        self.emit(SSE_CVTDQ2PSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
559    }
560}
561
562impl<'a> SseCvtdq2psEmitter<Xmm, Mem> for Assembler<'a> {
563    fn sse_cvtdq2ps(&mut self, op0: Xmm, op1: Mem) {
564        self.emit(SSE_CVTDQ2PSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
565    }
566}
567
568/// `SSE_CVTPD2DQ` (CVTPD2DQ). 
569/// Converts packed double precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).
570///
571///
572/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPD2DQ.html).
573///
574/// Supported operand variants:
575///
576/// ```text
577/// +---+----------+
578/// | # | Operands |
579/// +---+----------+
580/// | 1 | Xmm, Mem |
581/// | 2 | Xmm, Xmm |
582/// +---+----------+
583/// ```
584pub trait SseCvtpd2dqEmitter<A, B> {
585    fn sse_cvtpd2dq(&mut self, op0: A, op1: B);
586}
587
588impl<'a> SseCvtpd2dqEmitter<Xmm, Xmm> for Assembler<'a> {
589    fn sse_cvtpd2dq(&mut self, op0: Xmm, op1: Xmm) {
590        self.emit(SSE_CVTPD2DQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
591    }
592}
593
594impl<'a> SseCvtpd2dqEmitter<Xmm, Mem> for Assembler<'a> {
595    fn sse_cvtpd2dq(&mut self, op0: Xmm, op1: Mem) {
596        self.emit(SSE_CVTPD2DQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
597    }
598}
599
600/// `SSE_CVTPD2PS` (CVTPD2PS). 
601/// Converts two, four or eight packed double precision floating-point values in the source operand (second operand) to two, four or eight packed single precision floating-point values in the destination operand (first operand).
602///
603///
604/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPD2PS.html).
605///
606/// Supported operand variants:
607///
608/// ```text
609/// +---+----------+
610/// | # | Operands |
611/// +---+----------+
612/// | 1 | Xmm, Mem |
613/// | 2 | Xmm, Xmm |
614/// +---+----------+
615/// ```
616pub trait SseCvtpd2psEmitter<A, B> {
617    fn sse_cvtpd2ps(&mut self, op0: A, op1: B);
618}
619
620impl<'a> SseCvtpd2psEmitter<Xmm, Xmm> for Assembler<'a> {
621    fn sse_cvtpd2ps(&mut self, op0: Xmm, op1: Xmm) {
622        self.emit(SSE_CVTPD2PSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
623    }
624}
625
626impl<'a> SseCvtpd2psEmitter<Xmm, Mem> for Assembler<'a> {
627    fn sse_cvtpd2ps(&mut self, op0: Xmm, op1: Mem) {
628        self.emit(SSE_CVTPD2PSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
629    }
630}
631
632/// `SSE_CVTPS2DQ` (CVTPS2DQ). 
633/// Converts four, eight or sixteen packed single precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.
634///
635///
636/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPS2DQ.html).
637///
638/// Supported operand variants:
639///
640/// ```text
641/// +---+----------+
642/// | # | Operands |
643/// +---+----------+
644/// | 1 | Xmm, Mem |
645/// | 2 | Xmm, Xmm |
646/// +---+----------+
647/// ```
648pub trait SseCvtps2dqEmitter<A, B> {
649    fn sse_cvtps2dq(&mut self, op0: A, op1: B);
650}
651
652impl<'a> SseCvtps2dqEmitter<Xmm, Xmm> for Assembler<'a> {
653    fn sse_cvtps2dq(&mut self, op0: Xmm, op1: Xmm) {
654        self.emit(SSE_CVTPS2DQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
655    }
656}
657
658impl<'a> SseCvtps2dqEmitter<Xmm, Mem> for Assembler<'a> {
659    fn sse_cvtps2dq(&mut self, op0: Xmm, op1: Mem) {
660        self.emit(SSE_CVTPS2DQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
661    }
662}
663
664/// `SSE_CVTPS2PD` (CVTPS2PD). 
665/// Converts two, four or eight packed single precision floating-point values in the source operand (second operand) to two, four or eight packed double precision floating-point values in the destination operand (first operand).
666///
667///
668/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPS2PD.html).
669///
670/// Supported operand variants:
671///
672/// ```text
673/// +---+----------+
674/// | # | Operands |
675/// +---+----------+
676/// | 1 | Xmm, Mem |
677/// | 2 | Xmm, Xmm |
678/// +---+----------+
679/// ```
680pub trait SseCvtps2pdEmitter<A, B> {
681    fn sse_cvtps2pd(&mut self, op0: A, op1: B);
682}
683
684impl<'a> SseCvtps2pdEmitter<Xmm, Xmm> for Assembler<'a> {
685    fn sse_cvtps2pd(&mut self, op0: Xmm, op1: Xmm) {
686        self.emit(SSE_CVTPS2PDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
687    }
688}
689
690impl<'a> SseCvtps2pdEmitter<Xmm, Mem> for Assembler<'a> {
691    fn sse_cvtps2pd(&mut self, op0: Xmm, op1: Mem) {
692        self.emit(SSE_CVTPS2PDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
693    }
694}
695
696/// `SSE_CVTSD2SI` (CVTSD2SI). 
697/// Converts a double precision floating-point value in the source operand (the second operand) to a signed double-word integer in the destination operand (first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double precision floating-point value is contained in the low quadword of the register.
698///
699///
700/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSD2SI.html).
701///
702/// Supported operand variants:
703///
704/// ```text
705/// +---+----------+
706/// | # | Operands |
707/// +---+----------+
708/// | 1 | Gpd, Mem |
709/// | 2 | Gpd, Xmm |
710/// | 3 | Gpq, Mem |
711/// | 4 | Gpq, Xmm |
712/// +---+----------+
713/// ```
714pub trait SseCvtsd2siEmitter<A, B> {
715    fn sse_cvtsd2si(&mut self, op0: A, op1: B);
716}
717
718impl<'a> SseCvtsd2siEmitter<Gpd, Xmm> for Assembler<'a> {
719    fn sse_cvtsd2si(&mut self, op0: Gpd, op1: Xmm) {
720        self.emit(SSE_CVTSD2SI32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
721    }
722}
723
724impl<'a> SseCvtsd2siEmitter<Gpd, Mem> for Assembler<'a> {
725    fn sse_cvtsd2si(&mut self, op0: Gpd, op1: Mem) {
726        self.emit(SSE_CVTSD2SI32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
727    }
728}
729
730impl<'a> SseCvtsd2siEmitter<Gpq, Xmm> for Assembler<'a> {
731    fn sse_cvtsd2si(&mut self, op0: Gpq, op1: Xmm) {
732        self.emit(SSE_CVTSD2SI64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
733    }
734}
735
736impl<'a> SseCvtsd2siEmitter<Gpq, Mem> for Assembler<'a> {
737    fn sse_cvtsd2si(&mut self, op0: Gpq, op1: Mem) {
738        self.emit(SSE_CVTSD2SI64RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
739    }
740}
741
742/// `SSE_CVTSD2SS` (CVTSD2SS). 
743/// Converts a double precision floating-point value in the “convert-from” source operand (the second operand in SSE2 version, otherwise the third operand) to a single precision floating-point value in the destination operand.
744///
745///
746/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSD2SS.html).
747///
748/// Supported operand variants:
749///
750/// ```text
751/// +---+----------+
752/// | # | Operands |
753/// +---+----------+
754/// | 1 | Xmm, Mem |
755/// | 2 | Xmm, Xmm |
756/// +---+----------+
757/// ```
758pub trait SseCvtsd2ssEmitter<A, B> {
759    fn sse_cvtsd2ss(&mut self, op0: A, op1: B);
760}
761
762impl<'a> SseCvtsd2ssEmitter<Xmm, Xmm> for Assembler<'a> {
763    fn sse_cvtsd2ss(&mut self, op0: Xmm, op1: Xmm) {
764        self.emit(SSE_CVTSD2SSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
765    }
766}
767
768impl<'a> SseCvtsd2ssEmitter<Xmm, Mem> for Assembler<'a> {
769    fn sse_cvtsd2ss(&mut self, op0: Xmm, op1: Mem) {
770        self.emit(SSE_CVTSD2SSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
771    }
772}
773
774/// `SSE_CVTSI2SD` (CVTSI2SD). 
775/// Converts a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the “convert-from” source operand to a double precision floating-point value in the destination operand. The result is stored in the low quadword of the destination operand, and the high quadword left unchanged. When conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register.
776///
777///
778/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSI2SD.html).
779///
780/// Supported operand variants:
781///
782/// ```text
783/// +---+----------+
784/// | # | Operands |
785/// +---+----------+
786/// | 1 | Xmm, Gpd |
787/// | 2 | Xmm, Gpq |
788/// | 3 | Xmm, Mem |
789/// +---+----------+
790/// ```
791pub trait SseCvtsi2sdEmitter<A, B> {
792    fn sse_cvtsi2sd(&mut self, op0: A, op1: B);
793}
794
795impl<'a> SseCvtsi2sdEmitter<Xmm, Gpd> for Assembler<'a> {
796    fn sse_cvtsi2sd(&mut self, op0: Xmm, op1: Gpd) {
797        self.emit(SSE_CVTSI2SD32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
798    }
799}
800
801impl<'a> SseCvtsi2sdEmitter<Xmm, Mem> for Assembler<'a> {
802    fn sse_cvtsi2sd(&mut self, op0: Xmm, op1: Mem) {
803        self.emit(SSE_CVTSI2SD32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
804    }
805}
806
807impl<'a> SseCvtsi2sdEmitter<Xmm, Gpq> for Assembler<'a> {
808    fn sse_cvtsi2sd(&mut self, op0: Xmm, op1: Gpq) {
809        self.emit(SSE_CVTSI2SD64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
810    }
811}
812
813/// `SSE_CVTSS2SD` (CVTSS2SD). 
814/// Converts a single precision floating-point value in the “convert-from” source operand to a double precision floating-point value in the destination operand. When the “convert-from” source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register. The result is stored in the low quadword of the destination operand.
815///
816///
817/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSS2SD.html).
818///
819/// Supported operand variants:
820///
821/// ```text
822/// +---+----------+
823/// | # | Operands |
824/// +---+----------+
825/// | 1 | Xmm, Mem |
826/// | 2 | Xmm, Xmm |
827/// +---+----------+
828/// ```
829pub trait SseCvtss2sdEmitter<A, B> {
830    fn sse_cvtss2sd(&mut self, op0: A, op1: B);
831}
832
833impl<'a> SseCvtss2sdEmitter<Xmm, Xmm> for Assembler<'a> {
834    fn sse_cvtss2sd(&mut self, op0: Xmm, op1: Xmm) {
835        self.emit(SSE_CVTSS2SDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
836    }
837}
838
839impl<'a> SseCvtss2sdEmitter<Xmm, Mem> for Assembler<'a> {
840    fn sse_cvtss2sd(&mut self, op0: Xmm, op1: Mem) {
841        self.emit(SSE_CVTSS2SDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
842    }
843}
844
845/// `SSE_CVTTPD2DQ` (CVTTPD2DQ). 
846/// Converts two, four or eight packed double precision floating-point values in the source operand (second operand) to two, four or eight packed signed doubleword integers in the destination operand (first operand).
847///
848///
849/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPD2DQ.html).
850///
851/// Supported operand variants:
852///
853/// ```text
854/// +---+----------+
855/// | # | Operands |
856/// +---+----------+
857/// | 1 | Xmm, Mem |
858/// | 2 | Xmm, Xmm |
859/// +---+----------+
860/// ```
861pub trait SseCvttpd2dqEmitter<A, B> {
862    fn sse_cvttpd2dq(&mut self, op0: A, op1: B);
863}
864
865impl<'a> SseCvttpd2dqEmitter<Xmm, Xmm> for Assembler<'a> {
866    fn sse_cvttpd2dq(&mut self, op0: Xmm, op1: Xmm) {
867        self.emit(SSE_CVTTPD2DQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
868    }
869}
870
871impl<'a> SseCvttpd2dqEmitter<Xmm, Mem> for Assembler<'a> {
872    fn sse_cvttpd2dq(&mut self, op0: Xmm, op1: Mem) {
873        self.emit(SSE_CVTTPD2DQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
874    }
875}
876
877/// `SSE_CVTTPS2DQ` (CVTTPS2DQ). 
878/// Converts four, eight or sixteen packed single precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.
879///
880///
881/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPS2DQ.html).
882///
883/// Supported operand variants:
884///
885/// ```text
886/// +---+----------+
887/// | # | Operands |
888/// +---+----------+
889/// | 1 | Xmm, Mem |
890/// | 2 | Xmm, Xmm |
891/// +---+----------+
892/// ```
893pub trait SseCvttps2dqEmitter<A, B> {
894    fn sse_cvttps2dq(&mut self, op0: A, op1: B);
895}
896
897impl<'a> SseCvttps2dqEmitter<Xmm, Xmm> for Assembler<'a> {
898    fn sse_cvttps2dq(&mut self, op0: Xmm, op1: Xmm) {
899        self.emit(SSE_CVTTPS2DQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
900    }
901}
902
903impl<'a> SseCvttps2dqEmitter<Xmm, Mem> for Assembler<'a> {
904    fn sse_cvttps2dq(&mut self, op0: Xmm, op1: Mem) {
905        self.emit(SSE_CVTTPS2DQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
906    }
907}
908
909/// `SSE_CVTTSD2SI` (CVTTSD2SI). 
910/// Converts a double precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the double precision floating-point value is contained in the low quadword of the register.
911///
912///
913/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTSD2SI.html).
914///
915/// Supported operand variants:
916///
917/// ```text
918/// +---+----------+
919/// | # | Operands |
920/// +---+----------+
921/// | 1 | Gpd, Mem |
922/// | 2 | Gpd, Xmm |
923/// | 3 | Gpq, Mem |
924/// | 4 | Gpq, Xmm |
925/// +---+----------+
926/// ```
927pub trait SseCvttsd2siEmitter<A, B> {
928    fn sse_cvttsd2si(&mut self, op0: A, op1: B);
929}
930
931impl<'a> SseCvttsd2siEmitter<Gpd, Xmm> for Assembler<'a> {
932    fn sse_cvttsd2si(&mut self, op0: Gpd, op1: Xmm) {
933        self.emit(SSE_CVTTSD2SI32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
934    }
935}
936
937impl<'a> SseCvttsd2siEmitter<Gpd, Mem> for Assembler<'a> {
938    fn sse_cvttsd2si(&mut self, op0: Gpd, op1: Mem) {
939        self.emit(SSE_CVTTSD2SI32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
940    }
941}
942
943impl<'a> SseCvttsd2siEmitter<Gpq, Xmm> for Assembler<'a> {
944    fn sse_cvttsd2si(&mut self, op0: Gpq, op1: Xmm) {
945        self.emit(SSE_CVTTSD2SI64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
946    }
947}
948
949impl<'a> SseCvttsd2siEmitter<Gpq, Mem> for Assembler<'a> {
950    fn sse_cvttsd2si(&mut self, op0: Gpq, op1: Mem) {
951        self.emit(SSE_CVTTSD2SI64RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
952    }
953}
954
955/// `SSE_DIVPD` (DIVPD). 
956/// Performs a SIMD divide of the double precision floating-point values in the first source operand by the floating-point values in the second source operand (the third operand). Results are written to the destination operand (the first operand).
957///
958///
959/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVPD.html).
960///
961/// Supported operand variants:
962///
963/// ```text
964/// +---+----------+
965/// | # | Operands |
966/// +---+----------+
967/// | 1 | Xmm, Mem |
968/// | 2 | Xmm, Xmm |
969/// +---+----------+
970/// ```
971pub trait SseDivpdEmitter<A, B> {
972    fn sse_divpd(&mut self, op0: A, op1: B);
973}
974
975impl<'a> SseDivpdEmitter<Xmm, Xmm> for Assembler<'a> {
976    fn sse_divpd(&mut self, op0: Xmm, op1: Xmm) {
977        self.emit(SSE_DIVPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
978    }
979}
980
981impl<'a> SseDivpdEmitter<Xmm, Mem> for Assembler<'a> {
982    fn sse_divpd(&mut self, op0: Xmm, op1: Mem) {
983        self.emit(SSE_DIVPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
984    }
985}
986
987/// `SSE_DIVSD` (DIVSD). 
988/// Divides the low double precision floating-point value in the first source operand by the low double precision floating-point value in the second source operand, and stores the double precision floating-point result in the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source and destination are XMM registers.
989///
990///
991/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVSD.html).
992///
993/// Supported operand variants:
994///
995/// ```text
996/// +---+----------+
997/// | # | Operands |
998/// +---+----------+
999/// | 1 | Xmm, Mem |
1000/// | 2 | Xmm, Xmm |
1001/// +---+----------+
1002/// ```
1003pub trait SseDivsdEmitter<A, B> {
1004    fn sse_divsd(&mut self, op0: A, op1: B);
1005}
1006
1007impl<'a> SseDivsdEmitter<Xmm, Xmm> for Assembler<'a> {
1008    fn sse_divsd(&mut self, op0: Xmm, op1: Xmm) {
1009        self.emit(SSE_DIVSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1010    }
1011}
1012
1013impl<'a> SseDivsdEmitter<Xmm, Mem> for Assembler<'a> {
1014    fn sse_divsd(&mut self, op0: Xmm, op1: Mem) {
1015        self.emit(SSE_DIVSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1016    }
1017}
1018
1019/// `SSE_MASKMOVDQU` (MASKMOVDQU). 
1020/// Stores selected bytes from the source operand (first operand) into an 128-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are XMM registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
1021///
1022///
1023/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVDQU.html).
1024///
1025/// Supported operand variants:
1026///
1027/// ```text
1028/// +---+----------+
1029/// | # | Operands |
1030/// +---+----------+
1031/// | 1 | Xmm, Xmm |
1032/// +---+----------+
1033/// ```
1034pub trait SseMaskmovdquEmitter<A, B> {
1035    fn sse_maskmovdqu(&mut self, op0: A, op1: B);
1036}
1037
1038impl<'a> SseMaskmovdquEmitter<Xmm, Xmm> for Assembler<'a> {
1039    fn sse_maskmovdqu(&mut self, op0: Xmm, op1: Xmm) {
1040        self.emit(SSE_MASKMOVDQURR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1041    }
1042}
1043
1044/// `SSE_MAXPD` (MAXPD). 
1045/// Performs a SIMD compare of the packed double precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand.
1046///
1047///
1048/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXPD.html).
1049///
1050/// Supported operand variants:
1051///
1052/// ```text
1053/// +---+----------+
1054/// | # | Operands |
1055/// +---+----------+
1056/// | 1 | Xmm, Mem |
1057/// | 2 | Xmm, Xmm |
1058/// +---+----------+
1059/// ```
1060pub trait SseMaxpdEmitter<A, B> {
1061    fn sse_maxpd(&mut self, op0: A, op1: B);
1062}
1063
1064impl<'a> SseMaxpdEmitter<Xmm, Xmm> for Assembler<'a> {
1065    fn sse_maxpd(&mut self, op0: Xmm, op1: Xmm) {
1066        self.emit(SSE_MAXPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1067    }
1068}
1069
1070impl<'a> SseMaxpdEmitter<Xmm, Mem> for Assembler<'a> {
1071    fn sse_maxpd(&mut self, op0: Xmm, op1: Mem) {
1072        self.emit(SSE_MAXPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1073    }
1074}
1075
1076/// `SSE_MAXSD` (MAXSD). 
1077/// Compares the low double precision floating-point values in the first source operand and the second source operand, and returns the maximum value to the low quadword of the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source and destination operands are XMM registers. When the second source operand is a memory operand, only 64 bits are accessed.
1078///
1079///
1080/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXSD.html).
1081///
1082/// Supported operand variants:
1083///
1084/// ```text
1085/// +---+----------+
1086/// | # | Operands |
1087/// +---+----------+
1088/// | 1 | Xmm, Mem |
1089/// | 2 | Xmm, Xmm |
1090/// +---+----------+
1091/// ```
1092pub trait SseMaxsdEmitter<A, B> {
1093    fn sse_maxsd(&mut self, op0: A, op1: B);
1094}
1095
1096impl<'a> SseMaxsdEmitter<Xmm, Xmm> for Assembler<'a> {
1097    fn sse_maxsd(&mut self, op0: Xmm, op1: Xmm) {
1098        self.emit(SSE_MAXSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1099    }
1100}
1101
1102impl<'a> SseMaxsdEmitter<Xmm, Mem> for Assembler<'a> {
1103    fn sse_maxsd(&mut self, op0: Xmm, op1: Mem) {
1104        self.emit(SSE_MAXSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1105    }
1106}
1107
1108/// `SSE_MINPD` (MINPD). 
1109/// Performs a SIMD compare of the packed double precision floating-point values in the first source operand and the second source operand and returns the minimum value for each pair of values to the destination operand.
1110///
1111///
1112/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINPD.html).
1113///
1114/// Supported operand variants:
1115///
1116/// ```text
1117/// +---+----------+
1118/// | # | Operands |
1119/// +---+----------+
1120/// | 1 | Xmm, Mem |
1121/// | 2 | Xmm, Xmm |
1122/// +---+----------+
1123/// ```
1124pub trait SseMinpdEmitter<A, B> {
1125    fn sse_minpd(&mut self, op0: A, op1: B);
1126}
1127
1128impl<'a> SseMinpdEmitter<Xmm, Xmm> for Assembler<'a> {
1129    fn sse_minpd(&mut self, op0: Xmm, op1: Xmm) {
1130        self.emit(SSE_MINPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1131    }
1132}
1133
1134impl<'a> SseMinpdEmitter<Xmm, Mem> for Assembler<'a> {
1135    fn sse_minpd(&mut self, op0: Xmm, op1: Mem) {
1136        self.emit(SSE_MINPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1137    }
1138}
1139
1140/// `SSE_MINSD` (MINSD). 
1141/// Compares the low double precision floating-point values in the first source operand and the second source operand, and returns the minimum value to the low quadword of the destination operand. When the source operand is a memory operand, only the 64 bits are accessed.
1142///
1143///
1144/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINSD.html).
1145///
1146/// Supported operand variants:
1147///
1148/// ```text
1149/// +---+----------+
1150/// | # | Operands |
1151/// +---+----------+
1152/// | 1 | Xmm, Mem |
1153/// | 2 | Xmm, Xmm |
1154/// +---+----------+
1155/// ```
1156pub trait SseMinsdEmitter<A, B> {
1157    fn sse_minsd(&mut self, op0: A, op1: B);
1158}
1159
1160impl<'a> SseMinsdEmitter<Xmm, Xmm> for Assembler<'a> {
1161    fn sse_minsd(&mut self, op0: Xmm, op1: Xmm) {
1162        self.emit(SSE_MINSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1163    }
1164}
1165
1166impl<'a> SseMinsdEmitter<Xmm, Mem> for Assembler<'a> {
1167    fn sse_minsd(&mut self, op0: Xmm, op1: Mem) {
1168        self.emit(SSE_MINSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1169    }
1170}
1171
1172/// `SSE_MOVAPD` (MOVAPD). 
1173/// Moves 2, 4 or 8 double precision floating-point values from the source operand (second operand) to the destination operand (first operand). This instruction can be used to load an XMM, YMM or ZMM register from an 128-bit, 256-bit or 512-bit memory location, to store the contents of an XMM, YMM or ZMM register into a 128-bit, 256-bit or 512-bit memory location, or to move data between two XMM, two YMM or two ZMM registers.
1174///
1175///
1176/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVAPD.html).
1177///
1178/// Supported operand variants:
1179///
1180/// ```text
1181/// +---+----------+
1182/// | # | Operands |
1183/// +---+----------+
1184/// | 1 | Mem, Xmm |
1185/// | 2 | Xmm, Mem |
1186/// | 3 | Xmm, Xmm |
1187/// +---+----------+
1188/// ```
1189pub trait SseMovapdEmitter<A, B> {
1190    fn sse_movapd(&mut self, op0: A, op1: B);
1191}
1192
1193impl<'a> SseMovapdEmitter<Xmm, Xmm> for Assembler<'a> {
1194    fn sse_movapd(&mut self, op0: Xmm, op1: Xmm) {
1195        self.emit(SSE_MOVAPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1196    }
1197}
1198
1199impl<'a> SseMovapdEmitter<Xmm, Mem> for Assembler<'a> {
1200    fn sse_movapd(&mut self, op0: Xmm, op1: Mem) {
1201        self.emit(SSE_MOVAPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1202    }
1203}
1204
1205impl<'a> SseMovapdEmitter<Mem, Xmm> for Assembler<'a> {
1206    fn sse_movapd(&mut self, op0: Mem, op1: Xmm) {
1207        self.emit(SSE_MOVAPDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1208    }
1209}
1210
1211/// `SSE_MOVDQA` (MOVDQA). 
1212/// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
1213///
1214///
1215/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQA%3AVMOVDQA32%3AVMOVDQA64.html).
1216///
1217/// Supported operand variants:
1218///
1219/// ```text
1220/// +---+----------+
1221/// | # | Operands |
1222/// +---+----------+
1223/// | 1 | Mem, Xmm |
1224/// | 2 | Xmm, Mem |
1225/// | 3 | Xmm, Xmm |
1226/// +---+----------+
1227/// ```
1228pub trait SseMovdqaEmitter<A, B> {
1229    fn sse_movdqa(&mut self, op0: A, op1: B);
1230}
1231
1232impl<'a> SseMovdqaEmitter<Xmm, Xmm> for Assembler<'a> {
1233    fn sse_movdqa(&mut self, op0: Xmm, op1: Xmm) {
1234        self.emit(SSE_MOVDQARR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1235    }
1236}
1237
1238impl<'a> SseMovdqaEmitter<Xmm, Mem> for Assembler<'a> {
1239    fn sse_movdqa(&mut self, op0: Xmm, op1: Mem) {
1240        self.emit(SSE_MOVDQARM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1241    }
1242}
1243
1244impl<'a> SseMovdqaEmitter<Mem, Xmm> for Assembler<'a> {
1245    fn sse_movdqa(&mut self, op0: Mem, op1: Xmm) {
1246        self.emit(SSE_MOVDQAMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1247    }
1248}
1249
1250/// `SSE_MOVDQU` (MOVDQU). 
1251/// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
1252///
1253///
1254/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQU%3AVMOVDQU8%3AVMOVDQU16%3AVMOVDQU32%3AVMOVDQU64.html).
1255///
1256/// Supported operand variants:
1257///
1258/// ```text
1259/// +---+----------+
1260/// | # | Operands |
1261/// +---+----------+
1262/// | 1 | Mem, Xmm |
1263/// | 2 | Xmm, Mem |
1264/// | 3 | Xmm, Xmm |
1265/// +---+----------+
1266/// ```
1267pub trait SseMovdquEmitter<A, B> {
1268    fn sse_movdqu(&mut self, op0: A, op1: B);
1269}
1270
1271impl<'a> SseMovdquEmitter<Xmm, Xmm> for Assembler<'a> {
1272    fn sse_movdqu(&mut self, op0: Xmm, op1: Xmm) {
1273        self.emit(SSE_MOVDQURR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1274    }
1275}
1276
1277impl<'a> SseMovdquEmitter<Xmm, Mem> for Assembler<'a> {
1278    fn sse_movdqu(&mut self, op0: Xmm, op1: Mem) {
1279        self.emit(SSE_MOVDQURM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1280    }
1281}
1282
1283impl<'a> SseMovdquEmitter<Mem, Xmm> for Assembler<'a> {
1284    fn sse_movdqu(&mut self, op0: Mem, op1: Xmm) {
1285        self.emit(SSE_MOVDQUMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1286    }
1287}
1288
1289/// `SSE_MOVD_G2X` (MOVD). 
1290/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1291///
1292///
1293/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1294///
1295/// Supported operand variants:
1296///
1297/// ```text
1298/// +---+----------+
1299/// | # | Operands |
1300/// +---+----------+
1301/// | 1 | Xmm, Gpd |
1302/// | 2 | Xmm, Mem |
1303/// +---+----------+
1304/// ```
1305pub trait SseMovdG2xEmitter<A, B> {
1306    fn sse_movd_g2x(&mut self, op0: A, op1: B);
1307}
1308
1309impl<'a> SseMovdG2xEmitter<Xmm, Gpd> for Assembler<'a> {
1310    fn sse_movd_g2x(&mut self, op0: Xmm, op1: Gpd) {
1311        self.emit(SSE_MOVD_G2XRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1312    }
1313}
1314
1315impl<'a> SseMovdG2xEmitter<Xmm, Mem> for Assembler<'a> {
1316    fn sse_movd_g2x(&mut self, op0: Xmm, op1: Mem) {
1317        self.emit(SSE_MOVD_G2XRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1318    }
1319}
1320
1321/// `SSE_MOVD_X2G` (MOVD). 
1322/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1323///
1324///
1325/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1326///
1327/// Supported operand variants:
1328///
1329/// ```text
1330/// +---+----------+
1331/// | # | Operands |
1332/// +---+----------+
1333/// | 1 | Gpd, Xmm |
1334/// | 2 | Mem, Xmm |
1335/// +---+----------+
1336/// ```
1337pub trait SseMovdX2gEmitter<A, B> {
1338    fn sse_movd_x2g(&mut self, op0: A, op1: B);
1339}
1340
1341impl<'a> SseMovdX2gEmitter<Gpd, Xmm> for Assembler<'a> {
1342    fn sse_movd_x2g(&mut self, op0: Gpd, op1: Xmm) {
1343        self.emit(SSE_MOVD_X2GRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1344    }
1345}
1346
1347impl<'a> SseMovdX2gEmitter<Mem, Xmm> for Assembler<'a> {
1348    fn sse_movd_x2g(&mut self, op0: Mem, op1: Xmm) {
1349        self.emit(SSE_MOVD_X2GMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1350    }
1351}
1352
1353/// `SSE_MOVHPD` (MOVHPD). 
1354/// This instruction cannot be used for register to register or memory to memory moves.
1355///
1356///
1357/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHPD.html).
1358///
1359/// Supported operand variants:
1360///
1361/// ```text
1362/// +---+----------+
1363/// | # | Operands |
1364/// +---+----------+
1365/// | 1 | Mem, Xmm |
1366/// | 2 | Xmm, Mem |
1367/// +---+----------+
1368/// ```
1369pub trait SseMovhpdEmitter<A, B> {
1370    fn sse_movhpd(&mut self, op0: A, op1: B);
1371}
1372
1373impl<'a> SseMovhpdEmitter<Xmm, Mem> for Assembler<'a> {
1374    fn sse_movhpd(&mut self, op0: Xmm, op1: Mem) {
1375        self.emit(SSE_MOVHPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1376    }
1377}
1378
1379impl<'a> SseMovhpdEmitter<Mem, Xmm> for Assembler<'a> {
1380    fn sse_movhpd(&mut self, op0: Mem, op1: Xmm) {
1381        self.emit(SSE_MOVHPDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1382    }
1383}
1384
1385/// `SSE_MOVLPD` (MOVLPD). 
1386/// This instruction cannot be used for register to register or memory to memory moves.
1387///
1388///
1389/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLPD.html).
1390///
1391/// Supported operand variants:
1392///
1393/// ```text
1394/// +---+----------+
1395/// | # | Operands |
1396/// +---+----------+
1397/// | 1 | Mem, Xmm |
1398/// | 2 | Xmm, Mem |
1399/// +---+----------+
1400/// ```
1401pub trait SseMovlpdEmitter<A, B> {
1402    fn sse_movlpd(&mut self, op0: A, op1: B);
1403}
1404
1405impl<'a> SseMovlpdEmitter<Xmm, Mem> for Assembler<'a> {
1406    fn sse_movlpd(&mut self, op0: Xmm, op1: Mem) {
1407        self.emit(SSE_MOVLPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1408    }
1409}
1410
1411impl<'a> SseMovlpdEmitter<Mem, Xmm> for Assembler<'a> {
1412    fn sse_movlpd(&mut self, op0: Mem, op1: Xmm) {
1413        self.emit(SSE_MOVLPDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1414    }
1415}
1416
1417/// `SSE_MOVMSKPD` (MOVMSKPD). 
1418/// Extracts the sign bits from the packed double precision floating-point values in the source operand (second operand), formats them into a 2-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM register, and the destination operand is a general-purpose register. The mask is stored in the 2 low-order bits of the destination operand. Zero-extend the upper bits of the destination.
1419///
1420///
1421/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPD.html).
1422///
1423/// Supported operand variants:
1424///
1425/// ```text
1426/// +---+----------+
1427/// | # | Operands |
1428/// +---+----------+
1429/// | 1 | Gpq, Xmm |
1430/// +---+----------+
1431/// ```
1432pub trait SseMovmskpdEmitter<A, B> {
1433    fn sse_movmskpd(&mut self, op0: A, op1: B);
1434}
1435
1436impl<'a> SseMovmskpdEmitter<Gpq, Xmm> for Assembler<'a> {
1437    fn sse_movmskpd(&mut self, op0: Gpq, op1: Xmm) {
1438        self.emit(SSE_MOVMSKPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1439    }
1440}
1441
1442/// `SSE_MOVNTDQ` (MOVNTDQ). 
1443/// Moves the packed integers in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain integer data (packed bytes, words, double-words, or quadwords). The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (512-bit version) boundary otherwise a general-protection exception (#GP) will be generated.
1444///
1445///
1446/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTDQ.html).
1447///
1448/// Supported operand variants:
1449///
1450/// ```text
1451/// +---+----------+
1452/// | # | Operands |
1453/// +---+----------+
1454/// | 1 | Mem, Xmm |
1455/// +---+----------+
1456/// ```
1457pub trait SseMovntdqEmitter<A, B> {
1458    fn sse_movntdq(&mut self, op0: A, op1: B);
1459}
1460
1461impl<'a> SseMovntdqEmitter<Mem, Xmm> for Assembler<'a> {
1462    fn sse_movntdq(&mut self, op0: Mem, op1: Xmm) {
1463        self.emit(SSE_MOVNTDQMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1464    }
1465}
1466
1467/// `SSE_MOVNTPD` (MOVNTPD). 
1468/// Moves the packed double precision floating-point values in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain packed double precision, floating-pointing data. The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (EVEX.512 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.
1469///
1470///
1471/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTPD.html).
1472///
1473/// Supported operand variants:
1474///
1475/// ```text
1476/// +---+----------+
1477/// | # | Operands |
1478/// +---+----------+
1479/// | 1 | Mem, Xmm |
1480/// +---+----------+
1481/// ```
1482pub trait SseMovntpdEmitter<A, B> {
1483    fn sse_movntpd(&mut self, op0: A, op1: B);
1484}
1485
1486impl<'a> SseMovntpdEmitter<Mem, Xmm> for Assembler<'a> {
1487    fn sse_movntpd(&mut self, op0: Mem, op1: Xmm) {
1488        self.emit(SSE_MOVNTPDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1489    }
1490}
1491
1492/// `SSE_MOVNTSD`.
1493///
1494/// Supported operand variants:
1495///
1496/// ```text
1497/// +---+----------+
1498/// | # | Operands |
1499/// +---+----------+
1500/// | 1 | Mem, Xmm |
1501/// +---+----------+
1502/// ```
1503pub trait SseMovntsdEmitter<A, B> {
1504    fn sse_movntsd(&mut self, op0: A, op1: B);
1505}
1506
1507impl<'a> SseMovntsdEmitter<Mem, Xmm> for Assembler<'a> {
1508    fn sse_movntsd(&mut self, op0: Mem, op1: Xmm) {
1509        self.emit(SSE_MOVNTSDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1510    }
1511}
1512
1513/// `SSE_MOVQ` (MOVQ). 
1514/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1515///
1516///
1517/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1518///
1519/// Supported operand variants:
1520///
1521/// ```text
1522/// +---+----------+
1523/// | # | Operands |
1524/// +---+----------+
1525/// | 1 | Mem, Xmm |
1526/// | 2 | Xmm, Mem |
1527/// | 3 | Xmm, Xmm |
1528/// +---+----------+
1529/// ```
1530pub trait SseMovqEmitter<A, B> {
1531    fn sse_movq(&mut self, op0: A, op1: B);
1532}
1533
1534impl<'a> SseMovqEmitter<Xmm, Xmm> for Assembler<'a> {
1535    fn sse_movq(&mut self, op0: Xmm, op1: Xmm) {
1536        self.emit(SSE_MOVQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1537    }
1538}
1539
1540impl<'a> SseMovqEmitter<Xmm, Mem> for Assembler<'a> {
1541    fn sse_movq(&mut self, op0: Xmm, op1: Mem) {
1542        self.emit(SSE_MOVQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1543    }
1544}
1545
1546impl<'a> SseMovqEmitter<Mem, Xmm> for Assembler<'a> {
1547    fn sse_movq(&mut self, op0: Mem, op1: Xmm) {
1548        self.emit(SSE_MOVQMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1549    }
1550}
1551
1552/// `SSE_MOVQ_G2X` (MOVQ). 
1553/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1554///
1555///
1556/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1557///
1558/// Supported operand variants:
1559///
1560/// ```text
1561/// +---+----------+
1562/// | # | Operands |
1563/// +---+----------+
1564/// | 1 | Xmm, Gpd |
1565/// | 2 | Xmm, Mem |
1566/// +---+----------+
1567/// ```
1568pub trait SseMovqG2xEmitter<A, B> {
1569    fn sse_movq_g2x(&mut self, op0: A, op1: B);
1570}
1571
1572impl<'a> SseMovqG2xEmitter<Xmm, Gpd> for Assembler<'a> {
1573    fn sse_movq_g2x(&mut self, op0: Xmm, op1: Gpd) {
1574        self.emit(SSE_MOVQ_G2XRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1575    }
1576}
1577
1578impl<'a> SseMovqG2xEmitter<Xmm, Mem> for Assembler<'a> {
1579    fn sse_movq_g2x(&mut self, op0: Xmm, op1: Mem) {
1580        self.emit(SSE_MOVQ_G2XRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1581    }
1582}
1583
1584/// `SSE_MOVQ_X2G` (MOVQ). 
1585/// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
1586///
1587///
1588/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
1589///
1590/// Supported operand variants:
1591///
1592/// ```text
1593/// +---+----------+
1594/// | # | Operands |
1595/// +---+----------+
1596/// | 1 | Gpd, Xmm |
1597/// | 2 | Mem, Xmm |
1598/// +---+----------+
1599/// ```
1600pub trait SseMovqX2gEmitter<A, B> {
1601    fn sse_movq_x2g(&mut self, op0: A, op1: B);
1602}
1603
1604impl<'a> SseMovqX2gEmitter<Gpd, Xmm> for Assembler<'a> {
1605    fn sse_movq_x2g(&mut self, op0: Gpd, op1: Xmm) {
1606        self.emit(SSE_MOVQ_X2GRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1607    }
1608}
1609
1610impl<'a> SseMovqX2gEmitter<Mem, Xmm> for Assembler<'a> {
1611    fn sse_movq_x2g(&mut self, op0: Mem, op1: Xmm) {
1612        self.emit(SSE_MOVQ_X2GMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1613    }
1614}
1615
1616/// `SSE_MOVSD` (MOVSD). 
1617/// Moves the byte, word, or doubleword specified with the second operand (source operand) to the location specified with the first operand (destination operand). Both the source and destination operands are located in memory. The address of the source operand is read from the DS:ESI or the DS:SI registers (depending on the address-size attribute of the instruction, 32 or 16, respectively). The address of the destination operand is read from the ES:EDI or the ES:DI registers (again depending on the address-size attribute of the instruction). The DS segment may be overridden with a segment override prefix, but the ES segment cannot be overridden.
1618///
1619///
1620/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVS%3AMOVSB%3AMOVSW%3AMOVSD%3AMOVSQ.html).
1621///
1622/// Supported operand variants:
1623///
1624/// ```text
1625/// +---+----------+
1626/// | # | Operands |
1627/// +---+----------+
1628/// | 1 | Mem, Xmm |
1629/// | 2 | Xmm, Mem |
1630/// | 3 | Xmm, Xmm |
1631/// +---+----------+
1632/// ```
1633pub trait SseMovsdEmitter<A, B> {
1634    fn sse_movsd(&mut self, op0: A, op1: B);
1635}
1636
1637impl<'a> SseMovsdEmitter<Xmm, Xmm> for Assembler<'a> {
1638    fn sse_movsd(&mut self, op0: Xmm, op1: Xmm) {
1639        self.emit(SSE_MOVSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1640    }
1641}
1642
1643impl<'a> SseMovsdEmitter<Xmm, Mem> for Assembler<'a> {
1644    fn sse_movsd(&mut self, op0: Xmm, op1: Mem) {
1645        self.emit(SSE_MOVSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1646    }
1647}
1648
1649impl<'a> SseMovsdEmitter<Mem, Xmm> for Assembler<'a> {
1650    fn sse_movsd(&mut self, op0: Mem, op1: Xmm) {
1651        self.emit(SSE_MOVSDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1652    }
1653}
1654
1655/// `SSE_MOVUPD` (MOVUPD). 
1656/// Note: VEX.vvvv and EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
1657///
1658///
1659/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVUPD.html).
1660///
1661/// Supported operand variants:
1662///
1663/// ```text
1664/// +---+----------+
1665/// | # | Operands |
1666/// +---+----------+
1667/// | 1 | Mem, Xmm |
1668/// | 2 | Xmm, Mem |
1669/// | 3 | Xmm, Xmm |
1670/// +---+----------+
1671/// ```
1672pub trait SseMovupdEmitter<A, B> {
1673    fn sse_movupd(&mut self, op0: A, op1: B);
1674}
1675
1676impl<'a> SseMovupdEmitter<Xmm, Xmm> for Assembler<'a> {
1677    fn sse_movupd(&mut self, op0: Xmm, op1: Xmm) {
1678        self.emit(SSE_MOVUPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1679    }
1680}
1681
1682impl<'a> SseMovupdEmitter<Xmm, Mem> for Assembler<'a> {
1683    fn sse_movupd(&mut self, op0: Xmm, op1: Mem) {
1684        self.emit(SSE_MOVUPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1685    }
1686}
1687
1688impl<'a> SseMovupdEmitter<Mem, Xmm> for Assembler<'a> {
1689    fn sse_movupd(&mut self, op0: Mem, op1: Xmm) {
1690        self.emit(SSE_MOVUPDMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1691    }
1692}
1693
1694/// `SSE_MULPD` (MULPD). 
1695/// Multiply packed double precision floating-point values from the first source operand with corresponding values in the second source operand, and stores the packed double precision floating-point results in the destination operand.
1696///
1697///
1698/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULPD.html).
1699///
1700/// Supported operand variants:
1701///
1702/// ```text
1703/// +---+----------+
1704/// | # | Operands |
1705/// +---+----------+
1706/// | 1 | Xmm, Mem |
1707/// | 2 | Xmm, Xmm |
1708/// +---+----------+
1709/// ```
1710pub trait SseMulpdEmitter<A, B> {
1711    fn sse_mulpd(&mut self, op0: A, op1: B);
1712}
1713
1714impl<'a> SseMulpdEmitter<Xmm, Xmm> for Assembler<'a> {
1715    fn sse_mulpd(&mut self, op0: Xmm, op1: Xmm) {
1716        self.emit(SSE_MULPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1717    }
1718}
1719
1720impl<'a> SseMulpdEmitter<Xmm, Mem> for Assembler<'a> {
1721    fn sse_mulpd(&mut self, op0: Xmm, op1: Mem) {
1722        self.emit(SSE_MULPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1723    }
1724}
1725
1726/// `SSE_MULSD` (MULSD). 
1727/// Multiplies the low double precision floating-point value in the second source operand by the low double precision floating-point value in the first source operand, and stores the double precision floating-point result in the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source operand and the destination operands are XMM registers.
1728///
1729///
1730/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULSD.html).
1731///
1732/// Supported operand variants:
1733///
1734/// ```text
1735/// +---+----------+
1736/// | # | Operands |
1737/// +---+----------+
1738/// | 1 | Xmm, Mem |
1739/// | 2 | Xmm, Xmm |
1740/// +---+----------+
1741/// ```
1742pub trait SseMulsdEmitter<A, B> {
1743    fn sse_mulsd(&mut self, op0: A, op1: B);
1744}
1745
1746impl<'a> SseMulsdEmitter<Xmm, Xmm> for Assembler<'a> {
1747    fn sse_mulsd(&mut self, op0: Xmm, op1: Xmm) {
1748        self.emit(SSE_MULSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1749    }
1750}
1751
1752impl<'a> SseMulsdEmitter<Xmm, Mem> for Assembler<'a> {
1753    fn sse_mulsd(&mut self, op0: Xmm, op1: Mem) {
1754        self.emit(SSE_MULSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1755    }
1756}
1757
1758/// `SSE_ORPD` (ORPD). 
1759/// Performs a bitwise logical OR of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
1760///
1761///
1762/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ORPD.html).
1763///
1764/// Supported operand variants:
1765///
1766/// ```text
1767/// +---+----------+
1768/// | # | Operands |
1769/// +---+----------+
1770/// | 1 | Xmm, Mem |
1771/// | 2 | Xmm, Xmm |
1772/// +---+----------+
1773/// ```
1774pub trait SseOrpdEmitter<A, B> {
1775    fn sse_orpd(&mut self, op0: A, op1: B);
1776}
1777
1778impl<'a> SseOrpdEmitter<Xmm, Xmm> for Assembler<'a> {
1779    fn sse_orpd(&mut self, op0: Xmm, op1: Xmm) {
1780        self.emit(SSE_ORPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1781    }
1782}
1783
1784impl<'a> SseOrpdEmitter<Xmm, Mem> for Assembler<'a> {
1785    fn sse_orpd(&mut self, op0: Xmm, op1: Mem) {
1786        self.emit(SSE_ORPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1787    }
1788}
1789
1790/// `SSE_PACKSSDW` (PACKSSDW). 
1791/// Converts packed signed word integers into packed signed byte integers (PACKSSWB) or converts packed signed doubleword integers into packed signed word integers (PACKSSDW), using saturation to handle overflow conditions. See Figure 4-6 for an example of the packing operation.
1792///
1793///
1794/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PACKSSWB%3APACKSSDW.html).
1795///
1796/// Supported operand variants:
1797///
1798/// ```text
1799/// +---+----------+
1800/// | # | Operands |
1801/// +---+----------+
1802/// | 1 | Xmm, Mem |
1803/// | 2 | Xmm, Xmm |
1804/// +---+----------+
1805/// ```
1806pub trait SsePackssdwEmitter<A, B> {
1807    fn sse_packssdw(&mut self, op0: A, op1: B);
1808}
1809
1810impl<'a> SsePackssdwEmitter<Xmm, Xmm> for Assembler<'a> {
1811    fn sse_packssdw(&mut self, op0: Xmm, op1: Xmm) {
1812        self.emit(SSE_PACKSSDWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1813    }
1814}
1815
1816impl<'a> SsePackssdwEmitter<Xmm, Mem> for Assembler<'a> {
1817    fn sse_packssdw(&mut self, op0: Xmm, op1: Mem) {
1818        self.emit(SSE_PACKSSDWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1819    }
1820}
1821
1822/// `SSE_PACKSSWB` (PACKSSWB). 
1823/// Converts packed signed word integers into packed signed byte integers (PACKSSWB) or converts packed signed doubleword integers into packed signed word integers (PACKSSDW), using saturation to handle overflow conditions. See Figure 4-6 for an example of the packing operation.
1824///
1825///
1826/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PACKSSWB%3APACKSSDW.html).
1827///
1828/// Supported operand variants:
1829///
1830/// ```text
1831/// +---+----------+
1832/// | # | Operands |
1833/// +---+----------+
1834/// | 1 | Xmm, Mem |
1835/// | 2 | Xmm, Xmm |
1836/// +---+----------+
1837/// ```
1838pub trait SsePacksswbEmitter<A, B> {
1839    fn sse_packsswb(&mut self, op0: A, op1: B);
1840}
1841
1842impl<'a> SsePacksswbEmitter<Xmm, Xmm> for Assembler<'a> {
1843    fn sse_packsswb(&mut self, op0: Xmm, op1: Xmm) {
1844        self.emit(SSE_PACKSSWBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1845    }
1846}
1847
1848impl<'a> SsePacksswbEmitter<Xmm, Mem> for Assembler<'a> {
1849    fn sse_packsswb(&mut self, op0: Xmm, op1: Mem) {
1850        self.emit(SSE_PACKSSWBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1851    }
1852}
1853
1854/// `SSE_PACKUSWB` (PACKUSWB). 
1855/// Converts 4, 8, 16, or 32 signed word integers from the destination operand (first operand) and 4, 8, 16, or 32 signed word integers from the source operand (second operand) into 8, 16, 32 or 64 unsigned byte integers and stores the result in the destination operand. (See Figure 4-6 for an example of the packing operation.) If a signed word integer value is beyond the range of an unsigned byte integer (that is, greater than FFH or less than 00H), the saturated unsigned byte integer value of FFH or 00H, respectively, is stored in the destination.
1856///
1857///
1858/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PACKUSWB.html).
1859///
1860/// Supported operand variants:
1861///
1862/// ```text
1863/// +---+----------+
1864/// | # | Operands |
1865/// +---+----------+
1866/// | 1 | Xmm, Mem |
1867/// | 2 | Xmm, Xmm |
1868/// +---+----------+
1869/// ```
1870pub trait SsePackuswbEmitter<A, B> {
1871    fn sse_packuswb(&mut self, op0: A, op1: B);
1872}
1873
1874impl<'a> SsePackuswbEmitter<Xmm, Xmm> for Assembler<'a> {
1875    fn sse_packuswb(&mut self, op0: Xmm, op1: Xmm) {
1876        self.emit(SSE_PACKUSWBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1877    }
1878}
1879
1880impl<'a> SsePackuswbEmitter<Xmm, Mem> for Assembler<'a> {
1881    fn sse_packuswb(&mut self, op0: Xmm, op1: Mem) {
1882        self.emit(SSE_PACKUSWBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1883    }
1884}
1885
1886/// `SSE_PADDB` (PADDB). 
1887/// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
1888///
1889///
1890/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
1891///
1892/// Supported operand variants:
1893///
1894/// ```text
1895/// +---+----------+
1896/// | # | Operands |
1897/// +---+----------+
1898/// | 1 | Xmm, Mem |
1899/// | 2 | Xmm, Xmm |
1900/// +---+----------+
1901/// ```
1902pub trait SsePaddbEmitter<A, B> {
1903    fn sse_paddb(&mut self, op0: A, op1: B);
1904}
1905
1906impl<'a> SsePaddbEmitter<Xmm, Xmm> for Assembler<'a> {
1907    fn sse_paddb(&mut self, op0: Xmm, op1: Xmm) {
1908        self.emit(SSE_PADDBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1909    }
1910}
1911
1912impl<'a> SsePaddbEmitter<Xmm, Mem> for Assembler<'a> {
1913    fn sse_paddb(&mut self, op0: Xmm, op1: Mem) {
1914        self.emit(SSE_PADDBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1915    }
1916}
1917
1918/// `SSE_PADDD` (PADDD). 
1919/// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
1920///
1921///
1922/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
1923///
1924/// Supported operand variants:
1925///
1926/// ```text
1927/// +---+----------+
1928/// | # | Operands |
1929/// +---+----------+
1930/// | 1 | Xmm, Mem |
1931/// | 2 | Xmm, Xmm |
1932/// +---+----------+
1933/// ```
1934pub trait SsePadddEmitter<A, B> {
1935    fn sse_paddd(&mut self, op0: A, op1: B);
1936}
1937
1938impl<'a> SsePadddEmitter<Xmm, Xmm> for Assembler<'a> {
1939    fn sse_paddd(&mut self, op0: Xmm, op1: Xmm) {
1940        self.emit(SSE_PADDDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1941    }
1942}
1943
1944impl<'a> SsePadddEmitter<Xmm, Mem> for Assembler<'a> {
1945    fn sse_paddd(&mut self, op0: Xmm, op1: Mem) {
1946        self.emit(SSE_PADDDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1947    }
1948}
1949
1950/// `SSE_PADDQ` (PADDQ). 
1951/// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
1952///
1953///
1954/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
1955///
1956/// Supported operand variants:
1957///
1958/// ```text
1959/// +---+----------+
1960/// | # | Operands |
1961/// +---+----------+
1962/// | 1 | Xmm, Mem |
1963/// | 2 | Xmm, Xmm |
1964/// +---+----------+
1965/// ```
1966pub trait SsePaddqEmitter<A, B> {
1967    fn sse_paddq(&mut self, op0: A, op1: B);
1968}
1969
1970impl<'a> SsePaddqEmitter<Xmm, Xmm> for Assembler<'a> {
1971    fn sse_paddq(&mut self, op0: Xmm, op1: Xmm) {
1972        self.emit(SSE_PADDQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1973    }
1974}
1975
1976impl<'a> SsePaddqEmitter<Xmm, Mem> for Assembler<'a> {
1977    fn sse_paddq(&mut self, op0: Xmm, op1: Mem) {
1978        self.emit(SSE_PADDQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1979    }
1980}
1981
1982/// `SSE_PADDSB` (PADDSB). 
1983/// Performs a SIMD add of the packed signed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
1984///
1985///
1986/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDSB%3APADDSW.html).
1987///
1988/// Supported operand variants:
1989///
1990/// ```text
1991/// +---+----------+
1992/// | # | Operands |
1993/// +---+----------+
1994/// | 1 | Xmm, Mem |
1995/// | 2 | Xmm, Xmm |
1996/// +---+----------+
1997/// ```
1998pub trait SsePaddsbEmitter<A, B> {
1999    fn sse_paddsb(&mut self, op0: A, op1: B);
2000}
2001
2002impl<'a> SsePaddsbEmitter<Xmm, Xmm> for Assembler<'a> {
2003    fn sse_paddsb(&mut self, op0: Xmm, op1: Xmm) {
2004        self.emit(SSE_PADDSBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2005    }
2006}
2007
2008impl<'a> SsePaddsbEmitter<Xmm, Mem> for Assembler<'a> {
2009    fn sse_paddsb(&mut self, op0: Xmm, op1: Mem) {
2010        self.emit(SSE_PADDSBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2011    }
2012}
2013
2014/// `SSE_PADDSW` (PADDSW). 
2015/// Performs a SIMD add of the packed signed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
2016///
2017///
2018/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDSB%3APADDSW.html).
2019///
2020/// Supported operand variants:
2021///
2022/// ```text
2023/// +---+----------+
2024/// | # | Operands |
2025/// +---+----------+
2026/// | 1 | Xmm, Mem |
2027/// | 2 | Xmm, Xmm |
2028/// +---+----------+
2029/// ```
2030pub trait SsePaddswEmitter<A, B> {
2031    fn sse_paddsw(&mut self, op0: A, op1: B);
2032}
2033
2034impl<'a> SsePaddswEmitter<Xmm, Xmm> for Assembler<'a> {
2035    fn sse_paddsw(&mut self, op0: Xmm, op1: Xmm) {
2036        self.emit(SSE_PADDSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2037    }
2038}
2039
2040impl<'a> SsePaddswEmitter<Xmm, Mem> for Assembler<'a> {
2041    fn sse_paddsw(&mut self, op0: Xmm, op1: Mem) {
2042        self.emit(SSE_PADDSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2043    }
2044}
2045
2046/// `SSE_PADDUSB` (PADDUSB). 
2047/// Performs a SIMD add of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
2048///
2049///
2050/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDUSB%3APADDUSW.html).
2051///
2052/// Supported operand variants:
2053///
2054/// ```text
2055/// +---+----------+
2056/// | # | Operands |
2057/// +---+----------+
2058/// | 1 | Xmm, Mem |
2059/// | 2 | Xmm, Xmm |
2060/// +---+----------+
2061/// ```
2062pub trait SsePaddusbEmitter<A, B> {
2063    fn sse_paddusb(&mut self, op0: A, op1: B);
2064}
2065
2066impl<'a> SsePaddusbEmitter<Xmm, Xmm> for Assembler<'a> {
2067    fn sse_paddusb(&mut self, op0: Xmm, op1: Xmm) {
2068        self.emit(SSE_PADDUSBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2069    }
2070}
2071
2072impl<'a> SsePaddusbEmitter<Xmm, Mem> for Assembler<'a> {
2073    fn sse_paddusb(&mut self, op0: Xmm, op1: Mem) {
2074        self.emit(SSE_PADDUSBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2075    }
2076}
2077
2078/// `SSE_PADDUSW` (PADDUSW). 
2079/// Performs a SIMD add of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
2080///
2081///
2082/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDUSB%3APADDUSW.html).
2083///
2084/// Supported operand variants:
2085///
2086/// ```text
2087/// +---+----------+
2088/// | # | Operands |
2089/// +---+----------+
2090/// | 1 | Xmm, Mem |
2091/// | 2 | Xmm, Xmm |
2092/// +---+----------+
2093/// ```
2094pub trait SsePadduswEmitter<A, B> {
2095    fn sse_paddusw(&mut self, op0: A, op1: B);
2096}
2097
2098impl<'a> SsePadduswEmitter<Xmm, Xmm> for Assembler<'a> {
2099    fn sse_paddusw(&mut self, op0: Xmm, op1: Xmm) {
2100        self.emit(SSE_PADDUSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2101    }
2102}
2103
2104impl<'a> SsePadduswEmitter<Xmm, Mem> for Assembler<'a> {
2105    fn sse_paddusw(&mut self, op0: Xmm, op1: Mem) {
2106        self.emit(SSE_PADDUSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2107    }
2108}
2109
2110/// `SSE_PADDW` (PADDW). 
2111/// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
2112///
2113///
2114/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
2115///
2116/// Supported operand variants:
2117///
2118/// ```text
2119/// +---+----------+
2120/// | # | Operands |
2121/// +---+----------+
2122/// | 1 | Xmm, Mem |
2123/// | 2 | Xmm, Xmm |
2124/// +---+----------+
2125/// ```
2126pub trait SsePaddwEmitter<A, B> {
2127    fn sse_paddw(&mut self, op0: A, op1: B);
2128}
2129
2130impl<'a> SsePaddwEmitter<Xmm, Xmm> for Assembler<'a> {
2131    fn sse_paddw(&mut self, op0: Xmm, op1: Xmm) {
2132        self.emit(SSE_PADDWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2133    }
2134}
2135
2136impl<'a> SsePaddwEmitter<Xmm, Mem> for Assembler<'a> {
2137    fn sse_paddw(&mut self, op0: Xmm, op1: Mem) {
2138        self.emit(SSE_PADDWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2139    }
2140}
2141
2142/// `SSE_PAND` (PAND). 
2143/// Performs a bitwise logical AND operation on the first source operand and second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bits of the first and second operands are 1, otherwise it is set to 0.
2144///
2145///
2146/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAND.html).
2147///
2148/// Supported operand variants:
2149///
2150/// ```text
2151/// +---+----------+
2152/// | # | Operands |
2153/// +---+----------+
2154/// | 1 | Xmm, Mem |
2155/// | 2 | Xmm, Xmm |
2156/// +---+----------+
2157/// ```
2158pub trait SsePandEmitter<A, B> {
2159    fn sse_pand(&mut self, op0: A, op1: B);
2160}
2161
2162impl<'a> SsePandEmitter<Xmm, Xmm> for Assembler<'a> {
2163    fn sse_pand(&mut self, op0: Xmm, op1: Xmm) {
2164        self.emit(SSE_PANDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2165    }
2166}
2167
2168impl<'a> SsePandEmitter<Xmm, Mem> for Assembler<'a> {
2169    fn sse_pand(&mut self, op0: Xmm, op1: Mem) {
2170        self.emit(SSE_PANDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2171    }
2172}
2173
2174/// `SSE_PANDN` (PANDN). 
2175/// Performs a bitwise logical NOT operation on the first source operand, then performs bitwise AND with second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bit in the first operand is 0 and the corresponding bit in the second operand is 1, otherwise it is set to 0.
2176///
2177///
2178/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PANDN.html).
2179///
2180/// Supported operand variants:
2181///
2182/// ```text
2183/// +---+----------+
2184/// | # | Operands |
2185/// +---+----------+
2186/// | 1 | Xmm, Mem |
2187/// | 2 | Xmm, Xmm |
2188/// +---+----------+
2189/// ```
2190pub trait SsePandnEmitter<A, B> {
2191    fn sse_pandn(&mut self, op0: A, op1: B);
2192}
2193
2194impl<'a> SsePandnEmitter<Xmm, Xmm> for Assembler<'a> {
2195    fn sse_pandn(&mut self, op0: Xmm, op1: Xmm) {
2196        self.emit(SSE_PANDNRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2197    }
2198}
2199
2200impl<'a> SsePandnEmitter<Xmm, Mem> for Assembler<'a> {
2201    fn sse_pandn(&mut self, op0: Xmm, op1: Mem) {
2202        self.emit(SSE_PANDNRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2203    }
2204}
2205
2206/// `SSE_PAVGB` (PAVGB). 
2207/// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
2208///
2209///
2210/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
2211///
2212/// Supported operand variants:
2213///
2214/// ```text
2215/// +---+----------+
2216/// | # | Operands |
2217/// +---+----------+
2218/// | 1 | Xmm, Mem |
2219/// | 2 | Xmm, Xmm |
2220/// +---+----------+
2221/// ```
2222pub trait SsePavgbEmitter<A, B> {
2223    fn sse_pavgb(&mut self, op0: A, op1: B);
2224}
2225
2226impl<'a> SsePavgbEmitter<Xmm, Xmm> for Assembler<'a> {
2227    fn sse_pavgb(&mut self, op0: Xmm, op1: Xmm) {
2228        self.emit(SSE_PAVGBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2229    }
2230}
2231
2232impl<'a> SsePavgbEmitter<Xmm, Mem> for Assembler<'a> {
2233    fn sse_pavgb(&mut self, op0: Xmm, op1: Mem) {
2234        self.emit(SSE_PAVGBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2235    }
2236}
2237
2238/// `SSE_PAVGW` (PAVGW). 
2239/// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
2240///
2241///
2242/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
2243///
2244/// Supported operand variants:
2245///
2246/// ```text
2247/// +---+----------+
2248/// | # | Operands |
2249/// +---+----------+
2250/// | 1 | Xmm, Mem |
2251/// | 2 | Xmm, Xmm |
2252/// +---+----------+
2253/// ```
2254pub trait SsePavgwEmitter<A, B> {
2255    fn sse_pavgw(&mut self, op0: A, op1: B);
2256}
2257
2258impl<'a> SsePavgwEmitter<Xmm, Xmm> for Assembler<'a> {
2259    fn sse_pavgw(&mut self, op0: Xmm, op1: Xmm) {
2260        self.emit(SSE_PAVGWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2261    }
2262}
2263
2264impl<'a> SsePavgwEmitter<Xmm, Mem> for Assembler<'a> {
2265    fn sse_pavgw(&mut self, op0: Xmm, op1: Mem) {
2266        self.emit(SSE_PAVGWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2267    }
2268}
2269
2270/// `SSE_PCMPEQB` (PCMPEQB). 
2271/// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2272///
2273///
2274/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
2275///
2276/// Supported operand variants:
2277///
2278/// ```text
2279/// +---+----------+
2280/// | # | Operands |
2281/// +---+----------+
2282/// | 1 | Xmm, Mem |
2283/// | 2 | Xmm, Xmm |
2284/// +---+----------+
2285/// ```
2286pub trait SsePcmpeqbEmitter<A, B> {
2287    fn sse_pcmpeqb(&mut self, op0: A, op1: B);
2288}
2289
2290impl<'a> SsePcmpeqbEmitter<Xmm, Xmm> for Assembler<'a> {
2291    fn sse_pcmpeqb(&mut self, op0: Xmm, op1: Xmm) {
2292        self.emit(SSE_PCMPEQBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2293    }
2294}
2295
2296impl<'a> SsePcmpeqbEmitter<Xmm, Mem> for Assembler<'a> {
2297    fn sse_pcmpeqb(&mut self, op0: Xmm, op1: Mem) {
2298        self.emit(SSE_PCMPEQBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2299    }
2300}
2301
2302/// `SSE_PCMPEQD` (PCMPEQD). 
2303/// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2304///
2305///
2306/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
2307///
2308/// Supported operand variants:
2309///
2310/// ```text
2311/// +---+----------+
2312/// | # | Operands |
2313/// +---+----------+
2314/// | 1 | Xmm, Mem |
2315/// | 2 | Xmm, Xmm |
2316/// +---+----------+
2317/// ```
2318pub trait SsePcmpeqdEmitter<A, B> {
2319    fn sse_pcmpeqd(&mut self, op0: A, op1: B);
2320}
2321
2322impl<'a> SsePcmpeqdEmitter<Xmm, Xmm> for Assembler<'a> {
2323    fn sse_pcmpeqd(&mut self, op0: Xmm, op1: Xmm) {
2324        self.emit(SSE_PCMPEQDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2325    }
2326}
2327
2328impl<'a> SsePcmpeqdEmitter<Xmm, Mem> for Assembler<'a> {
2329    fn sse_pcmpeqd(&mut self, op0: Xmm, op1: Mem) {
2330        self.emit(SSE_PCMPEQDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2331    }
2332}
2333
2334/// `SSE_PCMPEQW` (PCMPEQW). 
2335/// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2336///
2337///
2338/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
2339///
2340/// Supported operand variants:
2341///
2342/// ```text
2343/// +---+----------+
2344/// | # | Operands |
2345/// +---+----------+
2346/// | 1 | Xmm, Mem |
2347/// | 2 | Xmm, Xmm |
2348/// +---+----------+
2349/// ```
2350pub trait SsePcmpeqwEmitter<A, B> {
2351    fn sse_pcmpeqw(&mut self, op0: A, op1: B);
2352}
2353
2354impl<'a> SsePcmpeqwEmitter<Xmm, Xmm> for Assembler<'a> {
2355    fn sse_pcmpeqw(&mut self, op0: Xmm, op1: Xmm) {
2356        self.emit(SSE_PCMPEQWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2357    }
2358}
2359
2360impl<'a> SsePcmpeqwEmitter<Xmm, Mem> for Assembler<'a> {
2361    fn sse_pcmpeqw(&mut self, op0: Xmm, op1: Mem) {
2362        self.emit(SSE_PCMPEQWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2363    }
2364}
2365
2366/// `SSE_PCMPGTB` (PCMPGTB). 
2367/// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2368///
2369///
2370/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
2371///
2372/// Supported operand variants:
2373///
2374/// ```text
2375/// +---+----------+
2376/// | # | Operands |
2377/// +---+----------+
2378/// | 1 | Xmm, Mem |
2379/// | 2 | Xmm, Xmm |
2380/// +---+----------+
2381/// ```
2382pub trait SsePcmpgtbEmitter<A, B> {
2383    fn sse_pcmpgtb(&mut self, op0: A, op1: B);
2384}
2385
2386impl<'a> SsePcmpgtbEmitter<Xmm, Xmm> for Assembler<'a> {
2387    fn sse_pcmpgtb(&mut self, op0: Xmm, op1: Xmm) {
2388        self.emit(SSE_PCMPGTBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2389    }
2390}
2391
2392impl<'a> SsePcmpgtbEmitter<Xmm, Mem> for Assembler<'a> {
2393    fn sse_pcmpgtb(&mut self, op0: Xmm, op1: Mem) {
2394        self.emit(SSE_PCMPGTBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2395    }
2396}
2397
2398/// `SSE_PCMPGTD` (PCMPGTD). 
2399/// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2400///
2401///
2402/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
2403///
2404/// Supported operand variants:
2405///
2406/// ```text
2407/// +---+----------+
2408/// | # | Operands |
2409/// +---+----------+
2410/// | 1 | Xmm, Mem |
2411/// | 2 | Xmm, Xmm |
2412/// +---+----------+
2413/// ```
2414pub trait SsePcmpgtdEmitter<A, B> {
2415    fn sse_pcmpgtd(&mut self, op0: A, op1: B);
2416}
2417
2418impl<'a> SsePcmpgtdEmitter<Xmm, Xmm> for Assembler<'a> {
2419    fn sse_pcmpgtd(&mut self, op0: Xmm, op1: Xmm) {
2420        self.emit(SSE_PCMPGTDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2421    }
2422}
2423
2424impl<'a> SsePcmpgtdEmitter<Xmm, Mem> for Assembler<'a> {
2425    fn sse_pcmpgtd(&mut self, op0: Xmm, op1: Mem) {
2426        self.emit(SSE_PCMPGTDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2427    }
2428}
2429
2430/// `SSE_PCMPGTW` (PCMPGTW). 
2431/// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
2432///
2433///
2434/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
2435///
2436/// Supported operand variants:
2437///
2438/// ```text
2439/// +---+----------+
2440/// | # | Operands |
2441/// +---+----------+
2442/// | 1 | Xmm, Mem |
2443/// | 2 | Xmm, Xmm |
2444/// +---+----------+
2445/// ```
2446pub trait SsePcmpgtwEmitter<A, B> {
2447    fn sse_pcmpgtw(&mut self, op0: A, op1: B);
2448}
2449
2450impl<'a> SsePcmpgtwEmitter<Xmm, Xmm> for Assembler<'a> {
2451    fn sse_pcmpgtw(&mut self, op0: Xmm, op1: Xmm) {
2452        self.emit(SSE_PCMPGTWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2453    }
2454}
2455
2456impl<'a> SsePcmpgtwEmitter<Xmm, Mem> for Assembler<'a> {
2457    fn sse_pcmpgtw(&mut self, op0: Xmm, op1: Mem) {
2458        self.emit(SSE_PCMPGTWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2459    }
2460}
2461
2462/// `SSE_PEXTRW` (PEXTRW). 
2463/// Copies the word in the source operand (second operand) specified by the count operand (third operand) to the destination operand (first operand). The source operand can be an MMX technology register or an XMM register. The destination operand can be the low word of a general-purpose register or a 16-bit memory address. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location. The content of the destination register above bit 16 is cleared (set to all 0s).
2464///
2465///
2466/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRW.html).
2467///
2468/// Supported operand variants:
2469///
2470/// ```text
2471/// +---+---------------+
2472/// | # | Operands      |
2473/// +---+---------------+
2474/// | 1 | Gpd, Xmm, Imm |
2475/// | 2 | Mem, Xmm, Imm |
2476/// +---+---------------+
2477/// ```
2478pub trait SsePextrwEmitter<A, B, C> {
2479    fn sse_pextrw(&mut self, op0: A, op1: B, op2: C);
2480}
2481
2482impl<'a> SsePextrwEmitter<Gpd, Xmm, Imm> for Assembler<'a> {
2483    fn sse_pextrw(&mut self, op0: Gpd, op1: Xmm, op2: Imm) {
2484        self.emit(SSE_PEXTRWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2485    }
2486}
2487
2488impl<'a> SsePextrwEmitter<Mem, Xmm, Imm> for Assembler<'a> {
2489    fn sse_pextrw(&mut self, op0: Mem, op1: Xmm, op2: Imm) {
2490        self.emit(SSE_PEXTRWMRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2491    }
2492}
2493
2494/// `SSE_PINSRW` (PINSRW). 
2495/// Three operand MMX and SSE instructions
2496///
2497///
2498/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRW.html).
2499///
2500/// Supported operand variants:
2501///
2502/// ```text
2503/// +---+---------------+
2504/// | # | Operands      |
2505/// +---+---------------+
2506/// | 1 | Xmm, Gpd, Imm |
2507/// | 2 | Xmm, Mem, Imm |
2508/// +---+---------------+
2509/// ```
2510pub trait SsePinsrwEmitter<A, B, C> {
2511    fn sse_pinsrw(&mut self, op0: A, op1: B, op2: C);
2512}
2513
2514impl<'a> SsePinsrwEmitter<Xmm, Gpd, Imm> for Assembler<'a> {
2515    fn sse_pinsrw(&mut self, op0: Xmm, op1: Gpd, op2: Imm) {
2516        self.emit(SSE_PINSRWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2517    }
2518}
2519
2520impl<'a> SsePinsrwEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2521    fn sse_pinsrw(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2522        self.emit(SSE_PINSRWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2523    }
2524}
2525
2526/// `SSE_PMADDWD` (PMADDWD). 
2527/// Multiplies the individual signed words of the destination operand (first operand) by the corresponding signed words of the source operand (second operand), producing temporary signed, doubleword results. The adjacent double-word results are then summed and stored in the destination operand. For example, the corresponding low-order words (15-0) and (31-16) in the source and destination operands are multiplied by one another and the double-word results are added together and stored in the low doubleword of the destination register (31-0). The same operation is performed on the other pairs of adjacent words. (Figure 4-11 shows this operation when using 64-bit operands).
2528///
2529///
2530/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMADDWD.html).
2531///
2532/// Supported operand variants:
2533///
2534/// ```text
2535/// +---+----------+
2536/// | # | Operands |
2537/// +---+----------+
2538/// | 1 | Xmm, Mem |
2539/// | 2 | Xmm, Xmm |
2540/// +---+----------+
2541/// ```
2542pub trait SsePmaddwdEmitter<A, B> {
2543    fn sse_pmaddwd(&mut self, op0: A, op1: B);
2544}
2545
2546impl<'a> SsePmaddwdEmitter<Xmm, Xmm> for Assembler<'a> {
2547    fn sse_pmaddwd(&mut self, op0: Xmm, op1: Xmm) {
2548        self.emit(SSE_PMADDWDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2549    }
2550}
2551
2552impl<'a> SsePmaddwdEmitter<Xmm, Mem> for Assembler<'a> {
2553    fn sse_pmaddwd(&mut self, op0: Xmm, op1: Mem) {
2554        self.emit(SSE_PMADDWDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2555    }
2556}
2557
2558/// `SSE_PMAXSW` (PMAXSW). 
2559/// Performs a SIMD compare of the packed signed byte, word, dword or qword integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
2560///
2561///
2562/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXSB%3APMAXSW%3APMAXSD%3APMAXSQ.html).
2563///
2564/// Supported operand variants:
2565///
2566/// ```text
2567/// +---+----------+
2568/// | # | Operands |
2569/// +---+----------+
2570/// | 1 | Xmm, Mem |
2571/// | 2 | Xmm, Xmm |
2572/// +---+----------+
2573/// ```
2574pub trait SsePmaxswEmitter<A, B> {
2575    fn sse_pmaxsw(&mut self, op0: A, op1: B);
2576}
2577
2578impl<'a> SsePmaxswEmitter<Xmm, Xmm> for Assembler<'a> {
2579    fn sse_pmaxsw(&mut self, op0: Xmm, op1: Xmm) {
2580        self.emit(SSE_PMAXSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2581    }
2582}
2583
2584impl<'a> SsePmaxswEmitter<Xmm, Mem> for Assembler<'a> {
2585    fn sse_pmaxsw(&mut self, op0: Xmm, op1: Mem) {
2586        self.emit(SSE_PMAXSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2587    }
2588}
2589
2590/// `SSE_PMAXUB` (PMAXUB). 
2591/// Performs a SIMD compare of the packed unsigned byte, word integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
2592///
2593///
2594/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXUB%3APMAXUW.html).
2595///
2596/// Supported operand variants:
2597///
2598/// ```text
2599/// +---+----------+
2600/// | # | Operands |
2601/// +---+----------+
2602/// | 1 | Xmm, Mem |
2603/// | 2 | Xmm, Xmm |
2604/// +---+----------+
2605/// ```
2606pub trait SsePmaxubEmitter<A, B> {
2607    fn sse_pmaxub(&mut self, op0: A, op1: B);
2608}
2609
2610impl<'a> SsePmaxubEmitter<Xmm, Xmm> for Assembler<'a> {
2611    fn sse_pmaxub(&mut self, op0: Xmm, op1: Xmm) {
2612        self.emit(SSE_PMAXUBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2613    }
2614}
2615
2616impl<'a> SsePmaxubEmitter<Xmm, Mem> for Assembler<'a> {
2617    fn sse_pmaxub(&mut self, op0: Xmm, op1: Mem) {
2618        self.emit(SSE_PMAXUBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2619    }
2620}
2621
2622/// `SSE_PMINSW` (PMINSW). 
2623/// Performs a SIMD compare of the packed signed byte, word, or dword integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
2624///
2625///
2626/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINSB%3APMINSW.html).
2627///
2628/// Supported operand variants:
2629///
2630/// ```text
2631/// +---+----------+
2632/// | # | Operands |
2633/// +---+----------+
2634/// | 1 | Xmm, Mem |
2635/// | 2 | Xmm, Xmm |
2636/// +---+----------+
2637/// ```
2638pub trait SsePminswEmitter<A, B> {
2639    fn sse_pminsw(&mut self, op0: A, op1: B);
2640}
2641
2642impl<'a> SsePminswEmitter<Xmm, Xmm> for Assembler<'a> {
2643    fn sse_pminsw(&mut self, op0: Xmm, op1: Xmm) {
2644        self.emit(SSE_PMINSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2645    }
2646}
2647
2648impl<'a> SsePminswEmitter<Xmm, Mem> for Assembler<'a> {
2649    fn sse_pminsw(&mut self, op0: Xmm, op1: Mem) {
2650        self.emit(SSE_PMINSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2651    }
2652}
2653
2654/// `SSE_PMINUB` (PMINUB). 
2655/// Performs a SIMD compare of the packed unsigned byte or word integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
2656///
2657///
2658/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINUB%3APMINUW.html).
2659///
2660/// Supported operand variants:
2661///
2662/// ```text
2663/// +---+----------+
2664/// | # | Operands |
2665/// +---+----------+
2666/// | 1 | Xmm, Mem |
2667/// | 2 | Xmm, Xmm |
2668/// +---+----------+
2669/// ```
2670pub trait SsePminubEmitter<A, B> {
2671    fn sse_pminub(&mut self, op0: A, op1: B);
2672}
2673
2674impl<'a> SsePminubEmitter<Xmm, Xmm> for Assembler<'a> {
2675    fn sse_pminub(&mut self, op0: Xmm, op1: Xmm) {
2676        self.emit(SSE_PMINUBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2677    }
2678}
2679
2680impl<'a> SsePminubEmitter<Xmm, Mem> for Assembler<'a> {
2681    fn sse_pminub(&mut self, op0: Xmm, op1: Mem) {
2682        self.emit(SSE_PMINUBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2683    }
2684}
2685
2686/// `SSE_PMOVMSKB` (PMOVMSKB). 
2687/// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
2688///
2689///
2690/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
2691///
2692/// Supported operand variants:
2693///
2694/// ```text
2695/// +---+----------+
2696/// | # | Operands |
2697/// +---+----------+
2698/// | 1 | Gpq, Xmm |
2699/// +---+----------+
2700/// ```
2701pub trait SsePmovmskbEmitter<A, B> {
2702    fn sse_pmovmskb(&mut self, op0: A, op1: B);
2703}
2704
2705impl<'a> SsePmovmskbEmitter<Gpq, Xmm> for Assembler<'a> {
2706    fn sse_pmovmskb(&mut self, op0: Gpq, op1: Xmm) {
2707        self.emit(SSE_PMOVMSKBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2708    }
2709}
2710
2711/// `SSE_PMULHUW` (PMULHUW). 
2712/// Performs a SIMD unsigned multiply of the packed unsigned word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each 32-bit intermediate results in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
2713///
2714///
2715/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHUW.html).
2716///
2717/// Supported operand variants:
2718///
2719/// ```text
2720/// +---+----------+
2721/// | # | Operands |
2722/// +---+----------+
2723/// | 1 | Xmm, Mem |
2724/// | 2 | Xmm, Xmm |
2725/// +---+----------+
2726/// ```
2727pub trait SsePmulhuwEmitter<A, B> {
2728    fn sse_pmulhuw(&mut self, op0: A, op1: B);
2729}
2730
2731impl<'a> SsePmulhuwEmitter<Xmm, Xmm> for Assembler<'a> {
2732    fn sse_pmulhuw(&mut self, op0: Xmm, op1: Xmm) {
2733        self.emit(SSE_PMULHUWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2734    }
2735}
2736
2737impl<'a> SsePmulhuwEmitter<Xmm, Mem> for Assembler<'a> {
2738    fn sse_pmulhuw(&mut self, op0: Xmm, op1: Mem) {
2739        self.emit(SSE_PMULHUWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2740    }
2741}
2742
2743/// `SSE_PMULHW` (PMULHW). 
2744/// Performs a SIMD signed multiply of the packed signed word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each intermediate 32-bit result in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
2745///
2746///
2747/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHW.html).
2748///
2749/// Supported operand variants:
2750///
2751/// ```text
2752/// +---+----------+
2753/// | # | Operands |
2754/// +---+----------+
2755/// | 1 | Xmm, Mem |
2756/// | 2 | Xmm, Xmm |
2757/// +---+----------+
2758/// ```
2759pub trait SsePmulhwEmitter<A, B> {
2760    fn sse_pmulhw(&mut self, op0: A, op1: B);
2761}
2762
2763impl<'a> SsePmulhwEmitter<Xmm, Xmm> for Assembler<'a> {
2764    fn sse_pmulhw(&mut self, op0: Xmm, op1: Xmm) {
2765        self.emit(SSE_PMULHWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2766    }
2767}
2768
2769impl<'a> SsePmulhwEmitter<Xmm, Mem> for Assembler<'a> {
2770    fn sse_pmulhw(&mut self, op0: Xmm, op1: Mem) {
2771        self.emit(SSE_PMULHWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2772    }
2773}
2774
2775/// `SSE_PMULLW` (PMULLW). 
2776/// Performs a SIMD signed multiply of the packed signed word integers in the destination operand (first operand) and the source operand (second operand), and stores the low 16 bits of each intermediate 32-bit result in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
2777///
2778///
2779/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULLW.html).
2780///
2781/// Supported operand variants:
2782///
2783/// ```text
2784/// +---+----------+
2785/// | # | Operands |
2786/// +---+----------+
2787/// | 1 | Xmm, Mem |
2788/// | 2 | Xmm, Xmm |
2789/// +---+----------+
2790/// ```
2791pub trait SsePmullwEmitter<A, B> {
2792    fn sse_pmullw(&mut self, op0: A, op1: B);
2793}
2794
2795impl<'a> SsePmullwEmitter<Xmm, Xmm> for Assembler<'a> {
2796    fn sse_pmullw(&mut self, op0: Xmm, op1: Xmm) {
2797        self.emit(SSE_PMULLWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2798    }
2799}
2800
2801impl<'a> SsePmullwEmitter<Xmm, Mem> for Assembler<'a> {
2802    fn sse_pmullw(&mut self, op0: Xmm, op1: Mem) {
2803        self.emit(SSE_PMULLWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2804    }
2805}
2806
2807/// `SSE_PMULUDQ` (PMULUDQ). 
2808/// Multiplies the first operand (destination operand) by the second operand (source operand) and stores the result in the destination operand.
2809///
2810///
2811/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULUDQ.html).
2812///
2813/// Supported operand variants:
2814///
2815/// ```text
2816/// +---+----------+
2817/// | # | Operands |
2818/// +---+----------+
2819/// | 1 | Xmm, Mem |
2820/// | 2 | Xmm, Xmm |
2821/// +---+----------+
2822/// ```
2823pub trait SsePmuludqEmitter<A, B> {
2824    fn sse_pmuludq(&mut self, op0: A, op1: B);
2825}
2826
2827impl<'a> SsePmuludqEmitter<Xmm, Xmm> for Assembler<'a> {
2828    fn sse_pmuludq(&mut self, op0: Xmm, op1: Xmm) {
2829        self.emit(SSE_PMULUDQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2830    }
2831}
2832
2833impl<'a> SsePmuludqEmitter<Xmm, Mem> for Assembler<'a> {
2834    fn sse_pmuludq(&mut self, op0: Xmm, op1: Mem) {
2835        self.emit(SSE_PMULUDQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2836    }
2837}
2838
2839/// `SSE_POR` (POR). 
2840/// Performs a bitwise logical OR operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is set to 1 if either or both of the corresponding bits of the first and second operands are 1; otherwise, it is set to 0.
2841///
2842///
2843/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/POR.html).
2844///
2845/// Supported operand variants:
2846///
2847/// ```text
2848/// +---+----------+
2849/// | # | Operands |
2850/// +---+----------+
2851/// | 1 | Xmm, Mem |
2852/// | 2 | Xmm, Xmm |
2853/// +---+----------+
2854/// ```
2855pub trait SsePorEmitter<A, B> {
2856    fn sse_por(&mut self, op0: A, op1: B);
2857}
2858
2859impl<'a> SsePorEmitter<Xmm, Xmm> for Assembler<'a> {
2860    fn sse_por(&mut self, op0: Xmm, op1: Xmm) {
2861        self.emit(SSE_PORRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2862    }
2863}
2864
2865impl<'a> SsePorEmitter<Xmm, Mem> for Assembler<'a> {
2866    fn sse_por(&mut self, op0: Xmm, op1: Mem) {
2867        self.emit(SSE_PORRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2868    }
2869}
2870
2871/// `SSE_PSADBW` (PSADBW). 
2872/// Computes the absolute value of the difference of 8 unsigned byte integers from the source operand (second operand) and from the destination operand (first operand). These 8 differences are then summed to produce an unsigned word integer result that is stored in the destination operand. Figure 4-14 shows the operation of the PSADBW instruction when using 64-bit operands.
2873///
2874///
2875/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSADBW.html).
2876///
2877/// Supported operand variants:
2878///
2879/// ```text
2880/// +---+----------+
2881/// | # | Operands |
2882/// +---+----------+
2883/// | 1 | Xmm, Mem |
2884/// | 2 | Xmm, Xmm |
2885/// +---+----------+
2886/// ```
2887pub trait SsePsadbwEmitter<A, B> {
2888    fn sse_psadbw(&mut self, op0: A, op1: B);
2889}
2890
2891impl<'a> SsePsadbwEmitter<Xmm, Xmm> for Assembler<'a> {
2892    fn sse_psadbw(&mut self, op0: Xmm, op1: Xmm) {
2893        self.emit(SSE_PSADBWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2894    }
2895}
2896
2897impl<'a> SsePsadbwEmitter<Xmm, Mem> for Assembler<'a> {
2898    fn sse_psadbw(&mut self, op0: Xmm, op1: Mem) {
2899        self.emit(SSE_PSADBWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
2900    }
2901}
2902
2903/// `SSE_PSHUFD` (PSHUFD). 
2904/// Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). Figure 4-16 shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in Figure 4-16) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.
2905///
2906///
2907/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFD.html).
2908///
2909/// Supported operand variants:
2910///
2911/// ```text
2912/// +---+---------------+
2913/// | # | Operands      |
2914/// +---+---------------+
2915/// | 1 | Xmm, Mem, Imm |
2916/// | 2 | Xmm, Xmm, Imm |
2917/// +---+---------------+
2918/// ```
2919pub trait SsePshufdEmitter<A, B, C> {
2920    fn sse_pshufd(&mut self, op0: A, op1: B, op2: C);
2921}
2922
2923impl<'a> SsePshufdEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2924    fn sse_pshufd(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2925        self.emit(SSE_PSHUFDRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2926    }
2927}
2928
2929impl<'a> SsePshufdEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2930    fn sse_pshufd(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2931        self.emit(SSE_PSHUFDRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2932    }
2933}
2934
2935/// `SSE_PSHUFHW` (PSHUFHW). 
2936/// Copies words from the high quadword of a 128-bit lane of the source operand and inserts them in the high quadword of the destination operand at word locations (of the respective lane) selected with the immediate operand. This 256-bit operation is similar to the in-lane operation used by the 256-bit VPSHUFD instruction, which is illustrated in Figure 4-16. For 128-bit operation, only the low 128-bit lane is operative. Each 2-bit field in the immediate operand selects the contents of one word location in the high quadword of the destination operand. The binary encodings of the immediate operand fields select words (0, 1, 2 or 3, 4) from the high quadword of the source operand to be copied to the destination operand. The low quadword of the source operand is copied to the low quadword of the destination operand, for each 128-bit lane.
2937///
2938///
2939/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFHW.html).
2940///
2941/// Supported operand variants:
2942///
2943/// ```text
2944/// +---+---------------+
2945/// | # | Operands      |
2946/// +---+---------------+
2947/// | 1 | Xmm, Mem, Imm |
2948/// | 2 | Xmm, Xmm, Imm |
2949/// +---+---------------+
2950/// ```
2951pub trait SsePshufhwEmitter<A, B, C> {
2952    fn sse_pshufhw(&mut self, op0: A, op1: B, op2: C);
2953}
2954
2955impl<'a> SsePshufhwEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2956    fn sse_pshufhw(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2957        self.emit(SSE_PSHUFHWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2958    }
2959}
2960
2961impl<'a> SsePshufhwEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2962    fn sse_pshufhw(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2963        self.emit(SSE_PSHUFHWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2964    }
2965}
2966
2967/// `SSE_PSHUFLW` (PSHUFLW). 
2968/// Copies words from the low quadword of a 128-bit lane of the source operand and inserts them in the low quadword of the destination operand at word locations (of the respective lane) selected with the immediate operand. The 256-bit operation is similar to the in-lane operation used by the 256-bit VPSHUFD instruction, which is illustrated in Figure 4-16. For 128-bit operation, only the low 128-bit lane is operative. Each 2-bit field in the immediate operand selects the contents of one word location in the low quadword of the destination operand. The binary encodings of the immediate operand fields select words (0, 1, 2 or 3) from the low quadword of the source operand to be copied to the destination operand. The high quadword of the source operand is copied to the high quadword of the destination operand, for each 128-bit lane.
2969///
2970///
2971/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFLW.html).
2972///
2973/// Supported operand variants:
2974///
2975/// ```text
2976/// +---+---------------+
2977/// | # | Operands      |
2978/// +---+---------------+
2979/// | 1 | Xmm, Mem, Imm |
2980/// | 2 | Xmm, Xmm, Imm |
2981/// +---+---------------+
2982/// ```
2983pub trait SsePshuflwEmitter<A, B, C> {
2984    fn sse_pshuflw(&mut self, op0: A, op1: B, op2: C);
2985}
2986
2987impl<'a> SsePshuflwEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
2988    fn sse_pshuflw(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
2989        self.emit(SSE_PSHUFLWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2990    }
2991}
2992
2993impl<'a> SsePshuflwEmitter<Xmm, Mem, Imm> for Assembler<'a> {
2994    fn sse_pshuflw(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
2995        self.emit(SSE_PSHUFLWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
2996    }
2997}
2998
2999/// `SSE_PSLLD` (PSLLD). 
3000/// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the left by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted left, the empty low-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-17 gives an example of shifting words in a 64-bit operand.
3001///
3002///
3003/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLW%3APSLLD%3APSLLQ.html).
3004///
3005/// Supported operand variants:
3006///
3007/// ```text
3008/// +---+----------+
3009/// | # | Operands |
3010/// +---+----------+
3011/// | 1 | Xmm, Imm |
3012/// | 2 | Xmm, Mem |
3013/// | 3 | Xmm, Xmm |
3014/// +---+----------+
3015/// ```
3016pub trait SsePslldEmitter<A, B> {
3017    fn sse_pslld(&mut self, op0: A, op1: B);
3018}
3019
3020impl<'a> SsePslldEmitter<Xmm, Imm> for Assembler<'a> {
3021    fn sse_pslld(&mut self, op0: Xmm, op1: Imm) {
3022        self.emit(SSE_PSLLDRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3023    }
3024}
3025
3026impl<'a> SsePslldEmitter<Xmm, Xmm> for Assembler<'a> {
3027    fn sse_pslld(&mut self, op0: Xmm, op1: Xmm) {
3028        self.emit(SSE_PSLLDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3029    }
3030}
3031
3032impl<'a> SsePslldEmitter<Xmm, Mem> for Assembler<'a> {
3033    fn sse_pslld(&mut self, op0: Xmm, op1: Mem) {
3034        self.emit(SSE_PSLLDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3035    }
3036}
3037
3038/// `SSE_PSLLDQ` (PSLLDQ). 
3039/// Shifts the destination operand (first operand) to the left by the number of bytes specified in the count operand (second operand). The empty low-order bytes are cleared (set to all 0s). If the value specified by the count operand is greater than 15, the destination operand is set to all 0s. The count operand is an 8-bit immediate.
3040///
3041///
3042/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLDQ.html).
3043///
3044/// Supported operand variants:
3045///
3046/// ```text
3047/// +---+----------+
3048/// | # | Operands |
3049/// +---+----------+
3050/// | 1 | Xmm, Imm |
3051/// +---+----------+
3052/// ```
3053pub trait SsePslldqEmitter<A, B> {
3054    fn sse_pslldq(&mut self, op0: A, op1: B);
3055}
3056
3057impl<'a> SsePslldqEmitter<Xmm, Imm> for Assembler<'a> {
3058    fn sse_pslldq(&mut self, op0: Xmm, op1: Imm) {
3059        self.emit(SSE_PSLLDQRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3060    }
3061}
3062
3063/// `SSE_PSLLQ` (PSLLQ). 
3064/// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the left by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted left, the empty low-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-17 gives an example of shifting words in a 64-bit operand.
3065///
3066///
3067/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLW%3APSLLD%3APSLLQ.html).
3068///
3069/// Supported operand variants:
3070///
3071/// ```text
3072/// +---+----------+
3073/// | # | Operands |
3074/// +---+----------+
3075/// | 1 | Xmm, Imm |
3076/// | 2 | Xmm, Mem |
3077/// | 3 | Xmm, Xmm |
3078/// +---+----------+
3079/// ```
3080pub trait SsePsllqEmitter<A, B> {
3081    fn sse_psllq(&mut self, op0: A, op1: B);
3082}
3083
3084impl<'a> SsePsllqEmitter<Xmm, Imm> for Assembler<'a> {
3085    fn sse_psllq(&mut self, op0: Xmm, op1: Imm) {
3086        self.emit(SSE_PSLLQRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3087    }
3088}
3089
3090impl<'a> SsePsllqEmitter<Xmm, Xmm> for Assembler<'a> {
3091    fn sse_psllq(&mut self, op0: Xmm, op1: Xmm) {
3092        self.emit(SSE_PSLLQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3093    }
3094}
3095
3096impl<'a> SsePsllqEmitter<Xmm, Mem> for Assembler<'a> {
3097    fn sse_psllq(&mut self, op0: Xmm, op1: Mem) {
3098        self.emit(SSE_PSLLQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3099    }
3100}
3101
3102/// `SSE_PSLLW` (PSLLW). 
3103/// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the left by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted left, the empty low-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-17 gives an example of shifting words in a 64-bit operand.
3104///
3105///
3106/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLW%3APSLLD%3APSLLQ.html).
3107///
3108/// Supported operand variants:
3109///
3110/// ```text
3111/// +---+----------+
3112/// | # | Operands |
3113/// +---+----------+
3114/// | 1 | Xmm, Imm |
3115/// | 2 | Xmm, Mem |
3116/// | 3 | Xmm, Xmm |
3117/// +---+----------+
3118/// ```
3119pub trait SsePsllwEmitter<A, B> {
3120    fn sse_psllw(&mut self, op0: A, op1: B);
3121}
3122
3123impl<'a> SsePsllwEmitter<Xmm, Imm> for Assembler<'a> {
3124    fn sse_psllw(&mut self, op0: Xmm, op1: Imm) {
3125        self.emit(SSE_PSLLWRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3126    }
3127}
3128
3129impl<'a> SsePsllwEmitter<Xmm, Xmm> for Assembler<'a> {
3130    fn sse_psllw(&mut self, op0: Xmm, op1: Xmm) {
3131        self.emit(SSE_PSLLWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3132    }
3133}
3134
3135impl<'a> SsePsllwEmitter<Xmm, Mem> for Assembler<'a> {
3136    fn sse_psllw(&mut self, op0: Xmm, op1: Mem) {
3137        self.emit(SSE_PSLLWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3138    }
3139}
3140
3141/// `SSE_PSRAD` (PSRAD). 
3142/// Shifts the bits in the individual data elements (words, doublewords or quadwords) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are filled with the initial value of the sign bit of the data element. If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for quadwords), each destination data element is filled with the initial value of the sign bit of the element. (Figure 4-18 gives an example of shifting words in a 64-bit operand.)
3143///
3144///
3145/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRAW%3APSRAD%3APSRAQ.html).
3146///
3147/// Supported operand variants:
3148///
3149/// ```text
3150/// +---+----------+
3151/// | # | Operands |
3152/// +---+----------+
3153/// | 1 | Xmm, Imm |
3154/// | 2 | Xmm, Mem |
3155/// | 3 | Xmm, Xmm |
3156/// +---+----------+
3157/// ```
3158pub trait SsePsradEmitter<A, B> {
3159    fn sse_psrad(&mut self, op0: A, op1: B);
3160}
3161
3162impl<'a> SsePsradEmitter<Xmm, Imm> for Assembler<'a> {
3163    fn sse_psrad(&mut self, op0: Xmm, op1: Imm) {
3164        self.emit(SSE_PSRADRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3165    }
3166}
3167
3168impl<'a> SsePsradEmitter<Xmm, Xmm> for Assembler<'a> {
3169    fn sse_psrad(&mut self, op0: Xmm, op1: Xmm) {
3170        self.emit(SSE_PSRADRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3171    }
3172}
3173
3174impl<'a> SsePsradEmitter<Xmm, Mem> for Assembler<'a> {
3175    fn sse_psrad(&mut self, op0: Xmm, op1: Mem) {
3176        self.emit(SSE_PSRADRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3177    }
3178}
3179
3180/// `SSE_PSRAW` (PSRAW). 
3181/// Shifts the bits in the individual data elements (words, doublewords or quadwords) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are filled with the initial value of the sign bit of the data element. If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for quadwords), each destination data element is filled with the initial value of the sign bit of the element. (Figure 4-18 gives an example of shifting words in a 64-bit operand.)
3182///
3183///
3184/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRAW%3APSRAD%3APSRAQ.html).
3185///
3186/// Supported operand variants:
3187///
3188/// ```text
3189/// +---+----------+
3190/// | # | Operands |
3191/// +---+----------+
3192/// | 1 | Xmm, Imm |
3193/// | 2 | Xmm, Mem |
3194/// | 3 | Xmm, Xmm |
3195/// +---+----------+
3196/// ```
3197pub trait SsePsrawEmitter<A, B> {
3198    fn sse_psraw(&mut self, op0: A, op1: B);
3199}
3200
3201impl<'a> SsePsrawEmitter<Xmm, Imm> for Assembler<'a> {
3202    fn sse_psraw(&mut self, op0: Xmm, op1: Imm) {
3203        self.emit(SSE_PSRAWRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3204    }
3205}
3206
3207impl<'a> SsePsrawEmitter<Xmm, Xmm> for Assembler<'a> {
3208    fn sse_psraw(&mut self, op0: Xmm, op1: Xmm) {
3209        self.emit(SSE_PSRAWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3210    }
3211}
3212
3213impl<'a> SsePsrawEmitter<Xmm, Mem> for Assembler<'a> {
3214    fn sse_psraw(&mut self, op0: Xmm, op1: Mem) {
3215        self.emit(SSE_PSRAWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3216    }
3217}
3218
3219/// `SSE_PSRLD` (PSRLD). 
3220/// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-19 gives an example of shifting words in a 64-bit operand.
3221///
3222///
3223/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLW%3APSRLD%3APSRLQ.html).
3224///
3225/// Supported operand variants:
3226///
3227/// ```text
3228/// +---+----------+
3229/// | # | Operands |
3230/// +---+----------+
3231/// | 1 | Xmm, Imm |
3232/// | 2 | Xmm, Mem |
3233/// | 3 | Xmm, Xmm |
3234/// +---+----------+
3235/// ```
3236pub trait SsePsrldEmitter<A, B> {
3237    fn sse_psrld(&mut self, op0: A, op1: B);
3238}
3239
3240impl<'a> SsePsrldEmitter<Xmm, Imm> for Assembler<'a> {
3241    fn sse_psrld(&mut self, op0: Xmm, op1: Imm) {
3242        self.emit(SSE_PSRLDRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3243    }
3244}
3245
3246impl<'a> SsePsrldEmitter<Xmm, Xmm> for Assembler<'a> {
3247    fn sse_psrld(&mut self, op0: Xmm, op1: Xmm) {
3248        self.emit(SSE_PSRLDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3249    }
3250}
3251
3252impl<'a> SsePsrldEmitter<Xmm, Mem> for Assembler<'a> {
3253    fn sse_psrld(&mut self, op0: Xmm, op1: Mem) {
3254        self.emit(SSE_PSRLDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3255    }
3256}
3257
3258/// `SSE_PSRLDQ` (PSRLDQ). 
3259/// Shifts the destination operand (first operand) to the right by the number of bytes specified in the count operand (second operand). The empty high-order bytes are cleared (set to all 0s). If the value specified by the count operand is greater than 15, the destination operand is set to all 0s. The count operand is an 8-bit immediate.
3260///
3261///
3262/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLDQ.html).
3263///
3264/// Supported operand variants:
3265///
3266/// ```text
3267/// +---+----------+
3268/// | # | Operands |
3269/// +---+----------+
3270/// | 1 | Xmm, Imm |
3271/// +---+----------+
3272/// ```
3273pub trait SsePsrldqEmitter<A, B> {
3274    fn sse_psrldq(&mut self, op0: A, op1: B);
3275}
3276
3277impl<'a> SsePsrldqEmitter<Xmm, Imm> for Assembler<'a> {
3278    fn sse_psrldq(&mut self, op0: Xmm, op1: Imm) {
3279        self.emit(SSE_PSRLDQRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3280    }
3281}
3282
3283/// `SSE_PSRLQ` (PSRLQ). 
3284/// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-19 gives an example of shifting words in a 64-bit operand.
3285///
3286///
3287/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLW%3APSRLD%3APSRLQ.html).
3288///
3289/// Supported operand variants:
3290///
3291/// ```text
3292/// +---+----------+
3293/// | # | Operands |
3294/// +---+----------+
3295/// | 1 | Xmm, Imm |
3296/// | 2 | Xmm, Mem |
3297/// | 3 | Xmm, Xmm |
3298/// +---+----------+
3299/// ```
3300pub trait SsePsrlqEmitter<A, B> {
3301    fn sse_psrlq(&mut self, op0: A, op1: B);
3302}
3303
3304impl<'a> SsePsrlqEmitter<Xmm, Imm> for Assembler<'a> {
3305    fn sse_psrlq(&mut self, op0: Xmm, op1: Imm) {
3306        self.emit(SSE_PSRLQRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3307    }
3308}
3309
3310impl<'a> SsePsrlqEmitter<Xmm, Xmm> for Assembler<'a> {
3311    fn sse_psrlq(&mut self, op0: Xmm, op1: Xmm) {
3312        self.emit(SSE_PSRLQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3313    }
3314}
3315
3316impl<'a> SsePsrlqEmitter<Xmm, Mem> for Assembler<'a> {
3317    fn sse_psrlq(&mut self, op0: Xmm, op1: Mem) {
3318        self.emit(SSE_PSRLQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3319    }
3320}
3321
3322/// `SSE_PSRLW` (PSRLW). 
3323/// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-19 gives an example of shifting words in a 64-bit operand.
3324///
3325///
3326/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLW%3APSRLD%3APSRLQ.html).
3327///
3328/// Supported operand variants:
3329///
3330/// ```text
3331/// +---+----------+
3332/// | # | Operands |
3333/// +---+----------+
3334/// | 1 | Xmm, Imm |
3335/// | 2 | Xmm, Mem |
3336/// | 3 | Xmm, Xmm |
3337/// +---+----------+
3338/// ```
3339pub trait SsePsrlwEmitter<A, B> {
3340    fn sse_psrlw(&mut self, op0: A, op1: B);
3341}
3342
3343impl<'a> SsePsrlwEmitter<Xmm, Imm> for Assembler<'a> {
3344    fn sse_psrlw(&mut self, op0: Xmm, op1: Imm) {
3345        self.emit(SSE_PSRLWRI, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3346    }
3347}
3348
3349impl<'a> SsePsrlwEmitter<Xmm, Xmm> for Assembler<'a> {
3350    fn sse_psrlw(&mut self, op0: Xmm, op1: Xmm) {
3351        self.emit(SSE_PSRLWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3352    }
3353}
3354
3355impl<'a> SsePsrlwEmitter<Xmm, Mem> for Assembler<'a> {
3356    fn sse_psrlw(&mut self, op0: Xmm, op1: Mem) {
3357        self.emit(SSE_PSRLWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3358    }
3359}
3360
3361/// `SSE_PSUBB` (PSUBB). 
3362/// Performs a SIMD subtract of the packed integers of the source operand (second operand) from the packed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
3363///
3364///
3365/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBB%3APSUBW%3APSUBD.html).
3366///
3367/// Supported operand variants:
3368///
3369/// ```text
3370/// +---+----------+
3371/// | # | Operands |
3372/// +---+----------+
3373/// | 1 | Xmm, Mem |
3374/// | 2 | Xmm, Xmm |
3375/// +---+----------+
3376/// ```
3377pub trait SsePsubbEmitter<A, B> {
3378    fn sse_psubb(&mut self, op0: A, op1: B);
3379}
3380
3381impl<'a> SsePsubbEmitter<Xmm, Xmm> for Assembler<'a> {
3382    fn sse_psubb(&mut self, op0: Xmm, op1: Xmm) {
3383        self.emit(SSE_PSUBBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3384    }
3385}
3386
3387impl<'a> SsePsubbEmitter<Xmm, Mem> for Assembler<'a> {
3388    fn sse_psubb(&mut self, op0: Xmm, op1: Mem) {
3389        self.emit(SSE_PSUBBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3390    }
3391}
3392
3393/// `SSE_PSUBD` (PSUBD). 
3394/// Performs a SIMD subtract of the packed integers of the source operand (second operand) from the packed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
3395///
3396///
3397/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBB%3APSUBW%3APSUBD.html).
3398///
3399/// Supported operand variants:
3400///
3401/// ```text
3402/// +---+----------+
3403/// | # | Operands |
3404/// +---+----------+
3405/// | 1 | Xmm, Mem |
3406/// | 2 | Xmm, Xmm |
3407/// +---+----------+
3408/// ```
3409pub trait SsePsubdEmitter<A, B> {
3410    fn sse_psubd(&mut self, op0: A, op1: B);
3411}
3412
3413impl<'a> SsePsubdEmitter<Xmm, Xmm> for Assembler<'a> {
3414    fn sse_psubd(&mut self, op0: Xmm, op1: Xmm) {
3415        self.emit(SSE_PSUBDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3416    }
3417}
3418
3419impl<'a> SsePsubdEmitter<Xmm, Mem> for Assembler<'a> {
3420    fn sse_psubd(&mut self, op0: Xmm, op1: Mem) {
3421        self.emit(SSE_PSUBDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3422    }
3423}
3424
3425/// `SSE_PSUBQ` (PSUBQ). 
3426/// Subtracts the second operand (source operand) from the first operand (destination operand) and stores the result in the destination operand. When packed quadword operands are used, a SIMD subtract is performed. When a quadword result is too large to be represented in 64 bits (overflow), the result is wrapped around and the low 64 bits are written to the destination element (that is, the carry is ignored).
3427///
3428///
3429/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBQ.html).
3430///
3431/// Supported operand variants:
3432///
3433/// ```text
3434/// +---+----------+
3435/// | # | Operands |
3436/// +---+----------+
3437/// | 1 | Xmm, Mem |
3438/// | 2 | Xmm, Xmm |
3439/// +---+----------+
3440/// ```
3441pub trait SsePsubqEmitter<A, B> {
3442    fn sse_psubq(&mut self, op0: A, op1: B);
3443}
3444
3445impl<'a> SsePsubqEmitter<Xmm, Xmm> for Assembler<'a> {
3446    fn sse_psubq(&mut self, op0: Xmm, op1: Xmm) {
3447        self.emit(SSE_PSUBQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3448    }
3449}
3450
3451impl<'a> SsePsubqEmitter<Xmm, Mem> for Assembler<'a> {
3452    fn sse_psubq(&mut self, op0: Xmm, op1: Mem) {
3453        self.emit(SSE_PSUBQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3454    }
3455}
3456
3457/// `SSE_PSUBSB` (PSUBSB). 
3458/// Performs a SIMD subtract of the packed signed integers of the source operand (second operand) from the packed signed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
3459///
3460///
3461/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBSB%3APSUBSW.html).
3462///
3463/// Supported operand variants:
3464///
3465/// ```text
3466/// +---+----------+
3467/// | # | Operands |
3468/// +---+----------+
3469/// | 1 | Xmm, Mem |
3470/// | 2 | Xmm, Xmm |
3471/// +---+----------+
3472/// ```
3473pub trait SsePsubsbEmitter<A, B> {
3474    fn sse_psubsb(&mut self, op0: A, op1: B);
3475}
3476
3477impl<'a> SsePsubsbEmitter<Xmm, Xmm> for Assembler<'a> {
3478    fn sse_psubsb(&mut self, op0: Xmm, op1: Xmm) {
3479        self.emit(SSE_PSUBSBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3480    }
3481}
3482
3483impl<'a> SsePsubsbEmitter<Xmm, Mem> for Assembler<'a> {
3484    fn sse_psubsb(&mut self, op0: Xmm, op1: Mem) {
3485        self.emit(SSE_PSUBSBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3486    }
3487}
3488
3489/// `SSE_PSUBSW` (PSUBSW). 
3490/// Performs a SIMD subtract of the packed signed integers of the source operand (second operand) from the packed signed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
3491///
3492///
3493/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBSB%3APSUBSW.html).
3494///
3495/// Supported operand variants:
3496///
3497/// ```text
3498/// +---+----------+
3499/// | # | Operands |
3500/// +---+----------+
3501/// | 1 | Xmm, Mem |
3502/// | 2 | Xmm, Xmm |
3503/// +---+----------+
3504/// ```
3505pub trait SsePsubswEmitter<A, B> {
3506    fn sse_psubsw(&mut self, op0: A, op1: B);
3507}
3508
3509impl<'a> SsePsubswEmitter<Xmm, Xmm> for Assembler<'a> {
3510    fn sse_psubsw(&mut self, op0: Xmm, op1: Xmm) {
3511        self.emit(SSE_PSUBSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3512    }
3513}
3514
3515impl<'a> SsePsubswEmitter<Xmm, Mem> for Assembler<'a> {
3516    fn sse_psubsw(&mut self, op0: Xmm, op1: Mem) {
3517        self.emit(SSE_PSUBSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3518    }
3519}
3520
3521/// `SSE_PSUBUSB` (PSUBUSB). 
3522/// Performs a SIMD subtract of the packed unsigned integers of the source operand (second operand) from the packed unsigned integers of the destination operand (first operand), and stores the packed unsigned integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
3523///
3524///
3525/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBUSB%3APSUBUSW.html).
3526///
3527/// Supported operand variants:
3528///
3529/// ```text
3530/// +---+----------+
3531/// | # | Operands |
3532/// +---+----------+
3533/// | 1 | Xmm, Mem |
3534/// | 2 | Xmm, Xmm |
3535/// +---+----------+
3536/// ```
3537pub trait SsePsubusbEmitter<A, B> {
3538    fn sse_psubusb(&mut self, op0: A, op1: B);
3539}
3540
3541impl<'a> SsePsubusbEmitter<Xmm, Xmm> for Assembler<'a> {
3542    fn sse_psubusb(&mut self, op0: Xmm, op1: Xmm) {
3543        self.emit(SSE_PSUBUSBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3544    }
3545}
3546
3547impl<'a> SsePsubusbEmitter<Xmm, Mem> for Assembler<'a> {
3548    fn sse_psubusb(&mut self, op0: Xmm, op1: Mem) {
3549        self.emit(SSE_PSUBUSBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3550    }
3551}
3552
3553/// `SSE_PSUBUSW` (PSUBUSW). 
3554/// Performs a SIMD subtract of the packed unsigned integers of the source operand (second operand) from the packed unsigned integers of the destination operand (first operand), and stores the packed unsigned integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
3555///
3556///
3557/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBUSB%3APSUBUSW.html).
3558///
3559/// Supported operand variants:
3560///
3561/// ```text
3562/// +---+----------+
3563/// | # | Operands |
3564/// +---+----------+
3565/// | 1 | Xmm, Mem |
3566/// | 2 | Xmm, Xmm |
3567/// +---+----------+
3568/// ```
3569pub trait SsePsubuswEmitter<A, B> {
3570    fn sse_psubusw(&mut self, op0: A, op1: B);
3571}
3572
3573impl<'a> SsePsubuswEmitter<Xmm, Xmm> for Assembler<'a> {
3574    fn sse_psubusw(&mut self, op0: Xmm, op1: Xmm) {
3575        self.emit(SSE_PSUBUSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3576    }
3577}
3578
3579impl<'a> SsePsubuswEmitter<Xmm, Mem> for Assembler<'a> {
3580    fn sse_psubusw(&mut self, op0: Xmm, op1: Mem) {
3581        self.emit(SSE_PSUBUSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3582    }
3583}
3584
3585/// `SSE_PSUBW` (PSUBW). 
3586/// Performs a SIMD subtract of the packed integers of the source operand (second operand) from the packed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
3587///
3588///
3589/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBB%3APSUBW%3APSUBD.html).
3590///
3591/// Supported operand variants:
3592///
3593/// ```text
3594/// +---+----------+
3595/// | # | Operands |
3596/// +---+----------+
3597/// | 1 | Xmm, Mem |
3598/// | 2 | Xmm, Xmm |
3599/// +---+----------+
3600/// ```
3601pub trait SsePsubwEmitter<A, B> {
3602    fn sse_psubw(&mut self, op0: A, op1: B);
3603}
3604
3605impl<'a> SsePsubwEmitter<Xmm, Xmm> for Assembler<'a> {
3606    fn sse_psubw(&mut self, op0: Xmm, op1: Xmm) {
3607        self.emit(SSE_PSUBWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3608    }
3609}
3610
3611impl<'a> SsePsubwEmitter<Xmm, Mem> for Assembler<'a> {
3612    fn sse_psubw(&mut self, op0: Xmm, op1: Mem) {
3613        self.emit(SSE_PSUBWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3614    }
3615}
3616
3617/// `SSE_PUNPCKHBW` (PUNPCKHBW). 
3618/// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
3619///
3620///
3621/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
3622///
3623/// Supported operand variants:
3624///
3625/// ```text
3626/// +---+----------+
3627/// | # | Operands |
3628/// +---+----------+
3629/// | 1 | Xmm, Mem |
3630/// | 2 | Xmm, Xmm |
3631/// +---+----------+
3632/// ```
3633pub trait SsePunpckhbwEmitter<A, B> {
3634    fn sse_punpckhbw(&mut self, op0: A, op1: B);
3635}
3636
3637impl<'a> SsePunpckhbwEmitter<Xmm, Xmm> for Assembler<'a> {
3638    fn sse_punpckhbw(&mut self, op0: Xmm, op1: Xmm) {
3639        self.emit(SSE_PUNPCKHBWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3640    }
3641}
3642
3643impl<'a> SsePunpckhbwEmitter<Xmm, Mem> for Assembler<'a> {
3644    fn sse_punpckhbw(&mut self, op0: Xmm, op1: Mem) {
3645        self.emit(SSE_PUNPCKHBWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3646    }
3647}
3648
3649/// `SSE_PUNPCKHDQ` (PUNPCKHDQ). 
3650/// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
3651///
3652///
3653/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
3654///
3655/// Supported operand variants:
3656///
3657/// ```text
3658/// +---+----------+
3659/// | # | Operands |
3660/// +---+----------+
3661/// | 1 | Xmm, Mem |
3662/// | 2 | Xmm, Xmm |
3663/// +---+----------+
3664/// ```
3665pub trait SsePunpckhdqEmitter<A, B> {
3666    fn sse_punpckhdq(&mut self, op0: A, op1: B);
3667}
3668
3669impl<'a> SsePunpckhdqEmitter<Xmm, Xmm> for Assembler<'a> {
3670    fn sse_punpckhdq(&mut self, op0: Xmm, op1: Xmm) {
3671        self.emit(SSE_PUNPCKHDQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3672    }
3673}
3674
3675impl<'a> SsePunpckhdqEmitter<Xmm, Mem> for Assembler<'a> {
3676    fn sse_punpckhdq(&mut self, op0: Xmm, op1: Mem) {
3677        self.emit(SSE_PUNPCKHDQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3678    }
3679}
3680
3681/// `SSE_PUNPCKHQDQ` (PUNPCKHQDQ). 
3682/// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
3683///
3684///
3685/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
3686///
3687/// Supported operand variants:
3688///
3689/// ```text
3690/// +---+----------+
3691/// | # | Operands |
3692/// +---+----------+
3693/// | 1 | Xmm, Mem |
3694/// | 2 | Xmm, Xmm |
3695/// +---+----------+
3696/// ```
3697pub trait SsePunpckhqdqEmitter<A, B> {
3698    fn sse_punpckhqdq(&mut self, op0: A, op1: B);
3699}
3700
3701impl<'a> SsePunpckhqdqEmitter<Xmm, Xmm> for Assembler<'a> {
3702    fn sse_punpckhqdq(&mut self, op0: Xmm, op1: Xmm) {
3703        self.emit(SSE_PUNPCKHQDQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3704    }
3705}
3706
3707impl<'a> SsePunpckhqdqEmitter<Xmm, Mem> for Assembler<'a> {
3708    fn sse_punpckhqdq(&mut self, op0: Xmm, op1: Mem) {
3709        self.emit(SSE_PUNPCKHQDQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3710    }
3711}
3712
3713/// `SSE_PUNPCKHWD` (PUNPCKHWD). 
3714/// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
3715///
3716///
3717/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
3718///
3719/// Supported operand variants:
3720///
3721/// ```text
3722/// +---+----------+
3723/// | # | Operands |
3724/// +---+----------+
3725/// | 1 | Xmm, Mem |
3726/// | 2 | Xmm, Xmm |
3727/// +---+----------+
3728/// ```
3729pub trait SsePunpckhwdEmitter<A, B> {
3730    fn sse_punpckhwd(&mut self, op0: A, op1: B);
3731}
3732
3733impl<'a> SsePunpckhwdEmitter<Xmm, Xmm> for Assembler<'a> {
3734    fn sse_punpckhwd(&mut self, op0: Xmm, op1: Xmm) {
3735        self.emit(SSE_PUNPCKHWDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3736    }
3737}
3738
3739impl<'a> SsePunpckhwdEmitter<Xmm, Mem> for Assembler<'a> {
3740    fn sse_punpckhwd(&mut self, op0: Xmm, op1: Mem) {
3741        self.emit(SSE_PUNPCKHWDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3742    }
3743}
3744
3745/// `SSE_PUNPCKLBW` (PUNPCKLBW). 
3746/// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
3747///
3748///
3749/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
3750///
3751/// Supported operand variants:
3752///
3753/// ```text
3754/// +---+----------+
3755/// | # | Operands |
3756/// +---+----------+
3757/// | 1 | Xmm, Mem |
3758/// | 2 | Xmm, Xmm |
3759/// +---+----------+
3760/// ```
3761pub trait SsePunpcklbwEmitter<A, B> {
3762    fn sse_punpcklbw(&mut self, op0: A, op1: B);
3763}
3764
3765impl<'a> SsePunpcklbwEmitter<Xmm, Xmm> for Assembler<'a> {
3766    fn sse_punpcklbw(&mut self, op0: Xmm, op1: Xmm) {
3767        self.emit(SSE_PUNPCKLBWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3768    }
3769}
3770
3771impl<'a> SsePunpcklbwEmitter<Xmm, Mem> for Assembler<'a> {
3772    fn sse_punpcklbw(&mut self, op0: Xmm, op1: Mem) {
3773        self.emit(SSE_PUNPCKLBWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3774    }
3775}
3776
3777/// `SSE_PUNPCKLDQ` (PUNPCKLDQ). 
3778/// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
3779///
3780///
3781/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
3782///
3783/// Supported operand variants:
3784///
3785/// ```text
3786/// +---+----------+
3787/// | # | Operands |
3788/// +---+----------+
3789/// | 1 | Xmm, Mem |
3790/// | 2 | Xmm, Xmm |
3791/// +---+----------+
3792/// ```
3793pub trait SsePunpckldqEmitter<A, B> {
3794    fn sse_punpckldq(&mut self, op0: A, op1: B);
3795}
3796
3797impl<'a> SsePunpckldqEmitter<Xmm, Xmm> for Assembler<'a> {
3798    fn sse_punpckldq(&mut self, op0: Xmm, op1: Xmm) {
3799        self.emit(SSE_PUNPCKLDQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3800    }
3801}
3802
3803impl<'a> SsePunpckldqEmitter<Xmm, Mem> for Assembler<'a> {
3804    fn sse_punpckldq(&mut self, op0: Xmm, op1: Mem) {
3805        self.emit(SSE_PUNPCKLDQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3806    }
3807}
3808
3809/// `SSE_PUNPCKLQDQ` (PUNPCKLQDQ). 
3810/// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
3811///
3812///
3813/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
3814///
3815/// Supported operand variants:
3816///
3817/// ```text
3818/// +---+----------+
3819/// | # | Operands |
3820/// +---+----------+
3821/// | 1 | Xmm, Mem |
3822/// | 2 | Xmm, Xmm |
3823/// +---+----------+
3824/// ```
3825pub trait SsePunpcklqdqEmitter<A, B> {
3826    fn sse_punpcklqdq(&mut self, op0: A, op1: B);
3827}
3828
3829impl<'a> SsePunpcklqdqEmitter<Xmm, Xmm> for Assembler<'a> {
3830    fn sse_punpcklqdq(&mut self, op0: Xmm, op1: Xmm) {
3831        self.emit(SSE_PUNPCKLQDQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3832    }
3833}
3834
3835impl<'a> SsePunpcklqdqEmitter<Xmm, Mem> for Assembler<'a> {
3836    fn sse_punpcklqdq(&mut self, op0: Xmm, op1: Mem) {
3837        self.emit(SSE_PUNPCKLQDQRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3838    }
3839}
3840
3841/// `SSE_PUNPCKLWD` (PUNPCKLWD). 
3842/// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
3843///
3844///
3845/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
3846///
3847/// Supported operand variants:
3848///
3849/// ```text
3850/// +---+----------+
3851/// | # | Operands |
3852/// +---+----------+
3853/// | 1 | Xmm, Mem |
3854/// | 2 | Xmm, Xmm |
3855/// +---+----------+
3856/// ```
3857pub trait SsePunpcklwdEmitter<A, B> {
3858    fn sse_punpcklwd(&mut self, op0: A, op1: B);
3859}
3860
3861impl<'a> SsePunpcklwdEmitter<Xmm, Xmm> for Assembler<'a> {
3862    fn sse_punpcklwd(&mut self, op0: Xmm, op1: Xmm) {
3863        self.emit(SSE_PUNPCKLWDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3864    }
3865}
3866
3867impl<'a> SsePunpcklwdEmitter<Xmm, Mem> for Assembler<'a> {
3868    fn sse_punpcklwd(&mut self, op0: Xmm, op1: Mem) {
3869        self.emit(SSE_PUNPCKLWDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3870    }
3871}
3872
3873/// `SSE_PXOR` (PXOR). 
3874/// Performs a bitwise logical exclusive-OR (XOR) operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is 1 if the corresponding bits of the two operands are different; each bit is 0 if the corresponding bits of the operands are the same.
3875///
3876///
3877/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PXOR.html).
3878///
3879/// Supported operand variants:
3880///
3881/// ```text
3882/// +---+----------+
3883/// | # | Operands |
3884/// +---+----------+
3885/// | 1 | Xmm, Mem |
3886/// | 2 | Xmm, Xmm |
3887/// +---+----------+
3888/// ```
3889pub trait SsePxorEmitter<A, B> {
3890    fn sse_pxor(&mut self, op0: A, op1: B);
3891}
3892
3893impl<'a> SsePxorEmitter<Xmm, Xmm> for Assembler<'a> {
3894    fn sse_pxor(&mut self, op0: Xmm, op1: Xmm) {
3895        self.emit(SSE_PXORRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3896    }
3897}
3898
3899impl<'a> SsePxorEmitter<Xmm, Mem> for Assembler<'a> {
3900    fn sse_pxor(&mut self, op0: Xmm, op1: Mem) {
3901        self.emit(SSE_PXORRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3902    }
3903}
3904
3905/// `SSE_SHUFPD` (SHUFPD). 
3906/// Selects a double precision floating-point value of an input pair using a bit control and move to a designated element of the destination operand. The low-to-high order of double precision element of the destination operand is interleaved between the first source operand and the second source operand at the granularity of input pair of 128 bits. Each bit in the imm8 byte, starting from bit 0, is the select control of the corresponding element of the destination to received the shuffled result of an input pair.
3907///
3908///
3909/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SHUFPD.html).
3910///
3911/// Supported operand variants:
3912///
3913/// ```text
3914/// +---+---------------+
3915/// | # | Operands      |
3916/// +---+---------------+
3917/// | 1 | Xmm, Mem, Imm |
3918/// | 2 | Xmm, Xmm, Imm |
3919/// +---+---------------+
3920/// ```
3921pub trait SseShufpdEmitter<A, B, C> {
3922    fn sse_shufpd(&mut self, op0: A, op1: B, op2: C);
3923}
3924
3925impl<'a> SseShufpdEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
3926    fn sse_shufpd(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
3927        self.emit(SSE_SHUFPDRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3928    }
3929}
3930
3931impl<'a> SseShufpdEmitter<Xmm, Mem, Imm> for Assembler<'a> {
3932    fn sse_shufpd(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
3933        self.emit(SSE_SHUFPDRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
3934    }
3935}
3936
3937/// `SSE_SQRTPD` (SQRTPD). 
3938/// Performs a SIMD computation of the square roots of the two, four or eight packed double precision floating-point values in the source operand (the second operand) stores the packed double precision floating-point results in the destination operand (the first operand).
3939///
3940///
3941/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTPD.html).
3942///
3943/// Supported operand variants:
3944///
3945/// ```text
3946/// +---+----------+
3947/// | # | Operands |
3948/// +---+----------+
3949/// | 1 | Xmm, Mem |
3950/// | 2 | Xmm, Xmm |
3951/// +---+----------+
3952/// ```
3953pub trait SseSqrtpdEmitter<A, B> {
3954    fn sse_sqrtpd(&mut self, op0: A, op1: B);
3955}
3956
3957impl<'a> SseSqrtpdEmitter<Xmm, Xmm> for Assembler<'a> {
3958    fn sse_sqrtpd(&mut self, op0: Xmm, op1: Xmm) {
3959        self.emit(SSE_SQRTPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3960    }
3961}
3962
3963impl<'a> SseSqrtpdEmitter<Xmm, Mem> for Assembler<'a> {
3964    fn sse_sqrtpd(&mut self, op0: Xmm, op1: Mem) {
3965        self.emit(SSE_SQRTPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3966    }
3967}
3968
3969/// `SSE_SQRTSD` (SQRTSD). 
3970/// Computes the square root of the low double precision floating-point value in the second source operand and stores the double precision floating-point result in the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source and destination operands are XMM registers.
3971///
3972///
3973/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTSD.html).
3974///
3975/// Supported operand variants:
3976///
3977/// ```text
3978/// +---+----------+
3979/// | # | Operands |
3980/// +---+----------+
3981/// | 1 | Xmm, Mem |
3982/// | 2 | Xmm, Xmm |
3983/// +---+----------+
3984/// ```
3985pub trait SseSqrtsdEmitter<A, B> {
3986    fn sse_sqrtsd(&mut self, op0: A, op1: B);
3987}
3988
3989impl<'a> SseSqrtsdEmitter<Xmm, Xmm> for Assembler<'a> {
3990    fn sse_sqrtsd(&mut self, op0: Xmm, op1: Xmm) {
3991        self.emit(SSE_SQRTSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3992    }
3993}
3994
3995impl<'a> SseSqrtsdEmitter<Xmm, Mem> for Assembler<'a> {
3996    fn sse_sqrtsd(&mut self, op0: Xmm, op1: Mem) {
3997        self.emit(SSE_SQRTSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
3998    }
3999}
4000
4001/// `SSE_SUBPD` (SUBPD). 
4002/// Performs a SIMD subtract of the two, four or eight packed double precision floating-point values of the second Source operand from the first Source operand, and stores the packed double precision floating-point results in the destination operand.
4003///
4004///
4005/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBPD.html).
4006///
4007/// Supported operand variants:
4008///
4009/// ```text
4010/// +---+----------+
4011/// | # | Operands |
4012/// +---+----------+
4013/// | 1 | Xmm, Mem |
4014/// | 2 | Xmm, Xmm |
4015/// +---+----------+
4016/// ```
4017pub trait SseSubpdEmitter<A, B> {
4018    fn sse_subpd(&mut self, op0: A, op1: B);
4019}
4020
4021impl<'a> SseSubpdEmitter<Xmm, Xmm> for Assembler<'a> {
4022    fn sse_subpd(&mut self, op0: Xmm, op1: Xmm) {
4023        self.emit(SSE_SUBPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4024    }
4025}
4026
4027impl<'a> SseSubpdEmitter<Xmm, Mem> for Assembler<'a> {
4028    fn sse_subpd(&mut self, op0: Xmm, op1: Mem) {
4029        self.emit(SSE_SUBPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4030    }
4031}
4032
4033/// `SSE_SUBSD` (SUBSD). 
4034/// Subtract the low double precision floating-point value in the second source operand from the first source operand and stores the double precision floating-point result in the low quadword of the destination operand.
4035///
4036///
4037/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBSD.html).
4038///
4039/// Supported operand variants:
4040///
4041/// ```text
4042/// +---+----------+
4043/// | # | Operands |
4044/// +---+----------+
4045/// | 1 | Xmm, Mem |
4046/// | 2 | Xmm, Xmm |
4047/// +---+----------+
4048/// ```
4049pub trait SseSubsdEmitter<A, B> {
4050    fn sse_subsd(&mut self, op0: A, op1: B);
4051}
4052
4053impl<'a> SseSubsdEmitter<Xmm, Xmm> for Assembler<'a> {
4054    fn sse_subsd(&mut self, op0: Xmm, op1: Xmm) {
4055        self.emit(SSE_SUBSDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4056    }
4057}
4058
4059impl<'a> SseSubsdEmitter<Xmm, Mem> for Assembler<'a> {
4060    fn sse_subsd(&mut self, op0: Xmm, op1: Mem) {
4061        self.emit(SSE_SUBSDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4062    }
4063}
4064
4065/// `SSE_UCOMISD` (UCOMISD). 
4066/// Performs an unordered compare of the double precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
4067///
4068///
4069/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UCOMISD.html).
4070///
4071/// Supported operand variants:
4072///
4073/// ```text
4074/// +---+----------+
4075/// | # | Operands |
4076/// +---+----------+
4077/// | 1 | Xmm, Mem |
4078/// | 2 | Xmm, Xmm |
4079/// +---+----------+
4080/// ```
4081pub trait SseUcomisdEmitter<A, B> {
4082    fn sse_ucomisd(&mut self, op0: A, op1: B);
4083}
4084
4085impl<'a> SseUcomisdEmitter<Xmm, Xmm> for Assembler<'a> {
4086    fn sse_ucomisd(&mut self, op0: Xmm, op1: Xmm) {
4087        self.emit(SSE_UCOMISDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4088    }
4089}
4090
4091impl<'a> SseUcomisdEmitter<Xmm, Mem> for Assembler<'a> {
4092    fn sse_ucomisd(&mut self, op0: Xmm, op1: Mem) {
4093        self.emit(SSE_UCOMISDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4094    }
4095}
4096
4097/// `SSE_UNPCKHPD` (UNPCKHPD). 
4098/// Performs an interleaved unpack of the high double precision floating-point values from the first source operand and the second source operand. See Figure 4-15 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 2B.
4099///
4100///
4101/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKHPD.html).
4102///
4103/// Supported operand variants:
4104///
4105/// ```text
4106/// +---+----------+
4107/// | # | Operands |
4108/// +---+----------+
4109/// | 1 | Xmm, Mem |
4110/// | 2 | Xmm, Xmm |
4111/// +---+----------+
4112/// ```
4113pub trait SseUnpckhpdEmitter<A, B> {
4114    fn sse_unpckhpd(&mut self, op0: A, op1: B);
4115}
4116
4117impl<'a> SseUnpckhpdEmitter<Xmm, Xmm> for Assembler<'a> {
4118    fn sse_unpckhpd(&mut self, op0: Xmm, op1: Xmm) {
4119        self.emit(SSE_UNPCKHPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4120    }
4121}
4122
4123impl<'a> SseUnpckhpdEmitter<Xmm, Mem> for Assembler<'a> {
4124    fn sse_unpckhpd(&mut self, op0: Xmm, op1: Mem) {
4125        self.emit(SSE_UNPCKHPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4126    }
4127}
4128
4129/// `SSE_UNPCKLPD` (UNPCKLPD). 
4130/// Performs an interleaved unpack of the low double precision floating-point values from the first source operand and the second source operand.
4131///
4132///
4133/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKLPD.html).
4134///
4135/// Supported operand variants:
4136///
4137/// ```text
4138/// +---+----------+
4139/// | # | Operands |
4140/// +---+----------+
4141/// | 1 | Xmm, Mem |
4142/// | 2 | Xmm, Xmm |
4143/// +---+----------+
4144/// ```
4145pub trait SseUnpcklpdEmitter<A, B> {
4146    fn sse_unpcklpd(&mut self, op0: A, op1: B);
4147}
4148
4149impl<'a> SseUnpcklpdEmitter<Xmm, Xmm> for Assembler<'a> {
4150    fn sse_unpcklpd(&mut self, op0: Xmm, op1: Xmm) {
4151        self.emit(SSE_UNPCKLPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4152    }
4153}
4154
4155impl<'a> SseUnpcklpdEmitter<Xmm, Mem> for Assembler<'a> {
4156    fn sse_unpcklpd(&mut self, op0: Xmm, op1: Mem) {
4157        self.emit(SSE_UNPCKLPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4158    }
4159}
4160
4161/// `SSE_XORPD` (XORPD). 
4162/// Performs a bitwise logical XOR of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
4163///
4164///
4165/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/XORPD.html).
4166///
4167/// Supported operand variants:
4168///
4169/// ```text
4170/// +---+----------+
4171/// | # | Operands |
4172/// +---+----------+
4173/// | 1 | Xmm, Mem |
4174/// | 2 | Xmm, Xmm |
4175/// +---+----------+
4176/// ```
4177pub trait SseXorpdEmitter<A, B> {
4178    fn sse_xorpd(&mut self, op0: A, op1: B);
4179}
4180
4181impl<'a> SseXorpdEmitter<Xmm, Xmm> for Assembler<'a> {
4182    fn sse_xorpd(&mut self, op0: Xmm, op1: Xmm) {
4183        self.emit(SSE_XORPDRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4184    }
4185}
4186
4187impl<'a> SseXorpdEmitter<Xmm, Mem> for Assembler<'a> {
4188    fn sse_xorpd(&mut self, op0: Xmm, op1: Mem) {
4189        self.emit(SSE_XORPDRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
4190    }
4191}
4192
4193
4194impl<'a> Assembler<'a> {
4195    /// `LFENCE` (LFENCE). 
4196    /// Performs a serializing operation on all load-from-memory instructions that were issued prior the LFENCE instruction. Specifically, LFENCE does not execute until all prior instructions have completed locally, and no later instruction begins execution until LFENCE completes. In particular, an instruction that loads from memory and that precedes an LFENCE receives data from memory prior to completion of the LFENCE. (An LFENCE that follows an instruction that stores to memory might complete before the data being stored have become globally visible.) Instructions following an LFENCE may be fetched from memory before the LFENCE, but they will not execute (even speculatively) until the LFENCE completes.
4197    ///
4198    ///
4199    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LFENCE.html).
4200    ///
4201    /// Supported operand variants:
4202    ///
4203    /// ```text
4204    /// +---+----------+
4205    /// | # | Operands |
4206    /// +---+----------+
4207    /// | 1 | (none)   |
4208    /// +---+----------+
4209    /// ```
4210    #[inline]
4211    pub fn lfence(&mut self)
4212    where Assembler<'a>: LfenceEmitter {
4213        <Self as LfenceEmitter>::lfence(self);
4214    }
4215    /// `MFENCE`.
4216    ///
4217    /// Supported operand variants:
4218    ///
4219    /// ```text
4220    /// +---+----------+
4221    /// | # | Operands |
4222    /// +---+----------+
4223    /// | 1 | (none)   |
4224    /// +---+----------+
4225    /// ```
4226    #[inline]
4227    pub fn mfence(&mut self)
4228    where Assembler<'a>: MfenceEmitter {
4229        <Self as MfenceEmitter>::mfence(self);
4230    }
4231    /// `MMX_CVTPD2PI` (CVTPD2PI). 
4232    /// Converts two packed double precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand).
4233    ///
4234    ///
4235    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPD2PI.html).
4236    ///
4237    /// Supported operand variants:
4238    ///
4239    /// ```text
4240    /// +---+----------+
4241    /// | # | Operands |
4242    /// +---+----------+
4243    /// | 1 | Mm, Mem  |
4244    /// | 2 | Mm, Xmm  |
4245    /// +---+----------+
4246    /// ```
4247    #[inline]
4248    pub fn mmx_cvtpd2pi<A, B>(&mut self, op0: A, op1: B)
4249    where Assembler<'a>: MmxCvtpd2piEmitter<A, B> {
4250        <Self as MmxCvtpd2piEmitter<A, B>>::mmx_cvtpd2pi(self, op0, op1);
4251    }
4252    /// `MMX_CVTPI2PD` (CVTPI2PD). 
4253    /// Converts two packed signed doubleword integers in the source operand (second operand) to two packed double precision floating-point values in the destination operand (first operand).
4254    ///
4255    ///
4256    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPI2PD.html).
4257    ///
4258    /// Supported operand variants:
4259    ///
4260    /// ```text
4261    /// +---+----------+
4262    /// | # | Operands |
4263    /// +---+----------+
4264    /// | 1 | Xmm, Mem |
4265    /// | 2 | Xmm, Mm  |
4266    /// +---+----------+
4267    /// ```
4268    #[inline]
4269    pub fn mmx_cvtpi2pd<A, B>(&mut self, op0: A, op1: B)
4270    where Assembler<'a>: MmxCvtpi2pdEmitter<A, B> {
4271        <Self as MmxCvtpi2pdEmitter<A, B>>::mmx_cvtpi2pd(self, op0, op1);
4272    }
4273    /// `MMX_CVTPI2PS` (CVTPI2PS). 
4274    /// Converts two packed signed doubleword integers in the source operand (second operand) to two packed single precision floating-point values in the destination operand (first operand).
4275    ///
4276    ///
4277    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPI2PS.html).
4278    ///
4279    /// Supported operand variants:
4280    ///
4281    /// ```text
4282    /// +---+----------+
4283    /// | # | Operands |
4284    /// +---+----------+
4285    /// | 1 | Xmm, Mem |
4286    /// | 2 | Xmm, Mm  |
4287    /// +---+----------+
4288    /// ```
4289    #[inline]
4290    pub fn mmx_cvtpi2ps<A, B>(&mut self, op0: A, op1: B)
4291    where Assembler<'a>: MmxCvtpi2psEmitter<A, B> {
4292        <Self as MmxCvtpi2psEmitter<A, B>>::mmx_cvtpi2ps(self, op0, op1);
4293    }
4294    /// `MMX_CVTPS2PI` (CVTPS2PI). 
4295    /// Converts two packed single precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand).
4296    ///
4297    ///
4298    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPS2PI.html).
4299    ///
4300    /// Supported operand variants:
4301    ///
4302    /// ```text
4303    /// +---+----------+
4304    /// | # | Operands |
4305    /// +---+----------+
4306    /// | 1 | Mm, Mem  |
4307    /// | 2 | Mm, Xmm  |
4308    /// +---+----------+
4309    /// ```
4310    #[inline]
4311    pub fn mmx_cvtps2pi<A, B>(&mut self, op0: A, op1: B)
4312    where Assembler<'a>: MmxCvtps2piEmitter<A, B> {
4313        <Self as MmxCvtps2piEmitter<A, B>>::mmx_cvtps2pi(self, op0, op1);
4314    }
4315    /// `MMX_CVTTPD2PI` (CVTTPD2PI). 
4316    /// Converts two packed double precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand). The source operand can be an XMM register or a 128-bit memory location. The destination operand is an MMX technology register.
4317    ///
4318    ///
4319    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPD2PI.html).
4320    ///
4321    /// Supported operand variants:
4322    ///
4323    /// ```text
4324    /// +---+----------+
4325    /// | # | Operands |
4326    /// +---+----------+
4327    /// | 1 | Mm, Mem  |
4328    /// | 2 | Mm, Xmm  |
4329    /// +---+----------+
4330    /// ```
4331    #[inline]
4332    pub fn mmx_cvttpd2pi<A, B>(&mut self, op0: A, op1: B)
4333    where Assembler<'a>: MmxCvttpd2piEmitter<A, B> {
4334        <Self as MmxCvttpd2piEmitter<A, B>>::mmx_cvttpd2pi(self, op0, op1);
4335    }
4336    /// `MMX_CVTTPS2PI` (CVTTPS2PI). 
4337    /// Converts two packed single precision floating-point values in the source operand (second operand) to two packed signed doubleword integers in the destination operand (first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is an MMX technology register. When the source operand is an XMM register, the two single precision floating-point values are contained in the low quadword of the register.
4338    ///
4339    ///
4340    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPS2PI.html).
4341    ///
4342    /// Supported operand variants:
4343    ///
4344    /// ```text
4345    /// +---+----------+
4346    /// | # | Operands |
4347    /// +---+----------+
4348    /// | 1 | Mm, Mem  |
4349    /// | 2 | Mm, Xmm  |
4350    /// +---+----------+
4351    /// ```
4352    #[inline]
4353    pub fn mmx_cvttps2pi<A, B>(&mut self, op0: A, op1: B)
4354    where Assembler<'a>: MmxCvttps2piEmitter<A, B> {
4355        <Self as MmxCvttps2piEmitter<A, B>>::mmx_cvttps2pi(self, op0, op1);
4356    }
4357    /// `MOVNTI` (MOVNTI). 
4358    /// Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to minimize cache pollution during the write to memory. The source operand is a general-purpose register. The destination operand is a 32-bit memory location.
4359    ///
4360    ///
4361    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTI.html).
4362    ///
4363    /// Supported operand variants:
4364    ///
4365    /// ```text
4366    /// +---+----------+
4367    /// | # | Operands |
4368    /// +---+----------+
4369    /// | 1 | Mem, Gpd |
4370    /// | 2 | Mem, Gpq |
4371    /// +---+----------+
4372    /// ```
4373    #[inline]
4374    pub fn movnti<A, B>(&mut self, op0: A, op1: B)
4375    where Assembler<'a>: MovntiEmitter<A, B> {
4376        <Self as MovntiEmitter<A, B>>::movnti(self, op0, op1);
4377    }
4378    /// `SSE_ADDPD` (ADDPD). 
4379    /// Adds two, four or eight packed double precision floating-point values from the first source operand to the second source operand, and stores the packed double precision floating-point result in the destination operand.
4380    ///
4381    ///
4382    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDPD.html).
4383    ///
4384    /// Supported operand variants:
4385    ///
4386    /// ```text
4387    /// +---+----------+
4388    /// | # | Operands |
4389    /// +---+----------+
4390    /// | 1 | Xmm, Mem |
4391    /// | 2 | Xmm, Xmm |
4392    /// +---+----------+
4393    /// ```
4394    #[inline]
4395    pub fn sse_addpd<A, B>(&mut self, op0: A, op1: B)
4396    where Assembler<'a>: SseAddpdEmitter<A, B> {
4397        <Self as SseAddpdEmitter<A, B>>::sse_addpd(self, op0, op1);
4398    }
4399    /// `SSE_ADDSD` (ADDSD). 
4400    /// Adds the low double precision floating-point values from the second source operand and the first source operand and stores the double precision floating-point result in the destination operand.
4401    ///
4402    ///
4403    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSD.html).
4404    ///
4405    /// Supported operand variants:
4406    ///
4407    /// ```text
4408    /// +---+----------+
4409    /// | # | Operands |
4410    /// +---+----------+
4411    /// | 1 | Xmm, Mem |
4412    /// | 2 | Xmm, Xmm |
4413    /// +---+----------+
4414    /// ```
4415    #[inline]
4416    pub fn sse_addsd<A, B>(&mut self, op0: A, op1: B)
4417    where Assembler<'a>: SseAddsdEmitter<A, B> {
4418        <Self as SseAddsdEmitter<A, B>>::sse_addsd(self, op0, op1);
4419    }
4420    /// `SSE_ANDNPD` (ANDNPD). 
4421    /// Performs a bitwise logical AND NOT of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
4422    ///
4423    ///
4424    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDNPD.html).
4425    ///
4426    /// Supported operand variants:
4427    ///
4428    /// ```text
4429    /// +---+----------+
4430    /// | # | Operands |
4431    /// +---+----------+
4432    /// | 1 | Xmm, Mem |
4433    /// | 2 | Xmm, Xmm |
4434    /// +---+----------+
4435    /// ```
4436    #[inline]
4437    pub fn sse_andnpd<A, B>(&mut self, op0: A, op1: B)
4438    where Assembler<'a>: SseAndnpdEmitter<A, B> {
4439        <Self as SseAndnpdEmitter<A, B>>::sse_andnpd(self, op0, op1);
4440    }
4441    /// `SSE_ANDPD` (ANDPD). 
4442    /// Performs a bitwise logical AND of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
4443    ///
4444    ///
4445    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDPD.html).
4446    ///
4447    /// Supported operand variants:
4448    ///
4449    /// ```text
4450    /// +---+----------+
4451    /// | # | Operands |
4452    /// +---+----------+
4453    /// | 1 | Xmm, Mem |
4454    /// | 2 | Xmm, Xmm |
4455    /// +---+----------+
4456    /// ```
4457    #[inline]
4458    pub fn sse_andpd<A, B>(&mut self, op0: A, op1: B)
4459    where Assembler<'a>: SseAndpdEmitter<A, B> {
4460        <Self as SseAndpdEmitter<A, B>>::sse_andpd(self, op0, op1);
4461    }
4462    /// `SSE_CMPPD` (CMPPD). 
4463    /// Performs a SIMD compare of the packed double precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.
4464    ///
4465    ///
4466    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPD.html).
4467    ///
4468    /// Supported operand variants:
4469    ///
4470    /// ```text
4471    /// +---+---------------+
4472    /// | # | Operands      |
4473    /// +---+---------------+
4474    /// | 1 | Xmm, Mem, Imm |
4475    /// | 2 | Xmm, Xmm, Imm |
4476    /// +---+---------------+
4477    /// ```
4478    #[inline]
4479    pub fn sse_cmppd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4480    where Assembler<'a>: SseCmppdEmitter<A, B, C> {
4481        <Self as SseCmppdEmitter<A, B, C>>::sse_cmppd(self, op0, op1, op2);
4482    }
4483    /// `SSE_CMPSD` (CMPSD). 
4484    /// Compares the byte, word, doubleword, or quadword specified with the first source operand with the byte, word, doubleword, or quadword specified with the second source operand and sets the status flags in the EFLAGS register according to the results.
4485    ///
4486    ///
4487    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPS%3ACMPSB%3ACMPSW%3ACMPSD%3ACMPSQ.html).
4488    ///
4489    /// Supported operand variants:
4490    ///
4491    /// ```text
4492    /// +---+---------------+
4493    /// | # | Operands      |
4494    /// +---+---------------+
4495    /// | 1 | Xmm, Mem, Imm |
4496    /// | 2 | Xmm, Xmm, Imm |
4497    /// +---+---------------+
4498    /// ```
4499    #[inline]
4500    pub fn sse_cmpsd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
4501    where Assembler<'a>: SseCmpsdEmitter<A, B, C> {
4502        <Self as SseCmpsdEmitter<A, B, C>>::sse_cmpsd(self, op0, op1, op2);
4503    }
4504    /// `SSE_COMISD` (COMISD). 
4505    /// Compares the double precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
4506    ///
4507    ///
4508    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/COMISD.html).
4509    ///
4510    /// Supported operand variants:
4511    ///
4512    /// ```text
4513    /// +---+----------+
4514    /// | # | Operands |
4515    /// +---+----------+
4516    /// | 1 | Xmm, Mem |
4517    /// | 2 | Xmm, Xmm |
4518    /// +---+----------+
4519    /// ```
4520    #[inline]
4521    pub fn sse_comisd<A, B>(&mut self, op0: A, op1: B)
4522    where Assembler<'a>: SseComisdEmitter<A, B> {
4523        <Self as SseComisdEmitter<A, B>>::sse_comisd(self, op0, op1);
4524    }
4525    /// `SSE_CVTDQ2PD` (CVTDQ2PD). 
4526    /// Converts two, four or eight packed signed doubleword integers in the source operand (the second operand) to two, four or eight packed double precision floating-point values in the destination operand (the first operand).
4527    ///
4528    ///
4529    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTDQ2PD.html).
4530    ///
4531    /// Supported operand variants:
4532    ///
4533    /// ```text
4534    /// +---+----------+
4535    /// | # | Operands |
4536    /// +---+----------+
4537    /// | 1 | Xmm, Mem |
4538    /// | 2 | Xmm, Xmm |
4539    /// +---+----------+
4540    /// ```
4541    #[inline]
4542    pub fn sse_cvtdq2pd<A, B>(&mut self, op0: A, op1: B)
4543    where Assembler<'a>: SseCvtdq2pdEmitter<A, B> {
4544        <Self as SseCvtdq2pdEmitter<A, B>>::sse_cvtdq2pd(self, op0, op1);
4545    }
4546    /// `SSE_CVTDQ2PS` (CVTDQ2PS). 
4547    /// Converts four, eight or sixteen packed signed doubleword integers in the source operand to four, eight or sixteen packed single precision floating-point values in the destination operand.
4548    ///
4549    ///
4550    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTDQ2PS.html).
4551    ///
4552    /// Supported operand variants:
4553    ///
4554    /// ```text
4555    /// +---+----------+
4556    /// | # | Operands |
4557    /// +---+----------+
4558    /// | 1 | Xmm, Mem |
4559    /// | 2 | Xmm, Xmm |
4560    /// +---+----------+
4561    /// ```
4562    #[inline]
4563    pub fn sse_cvtdq2ps<A, B>(&mut self, op0: A, op1: B)
4564    where Assembler<'a>: SseCvtdq2psEmitter<A, B> {
4565        <Self as SseCvtdq2psEmitter<A, B>>::sse_cvtdq2ps(self, op0, op1);
4566    }
4567    /// `SSE_CVTPD2DQ` (CVTPD2DQ). 
4568    /// Converts packed double precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).
4569    ///
4570    ///
4571    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPD2DQ.html).
4572    ///
4573    /// Supported operand variants:
4574    ///
4575    /// ```text
4576    /// +---+----------+
4577    /// | # | Operands |
4578    /// +---+----------+
4579    /// | 1 | Xmm, Mem |
4580    /// | 2 | Xmm, Xmm |
4581    /// +---+----------+
4582    /// ```
4583    #[inline]
4584    pub fn sse_cvtpd2dq<A, B>(&mut self, op0: A, op1: B)
4585    where Assembler<'a>: SseCvtpd2dqEmitter<A, B> {
4586        <Self as SseCvtpd2dqEmitter<A, B>>::sse_cvtpd2dq(self, op0, op1);
4587    }
4588    /// `SSE_CVTPD2PS` (CVTPD2PS). 
4589    /// Converts two, four or eight packed double precision floating-point values in the source operand (second operand) to two, four or eight packed single precision floating-point values in the destination operand (first operand).
4590    ///
4591    ///
4592    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPD2PS.html).
4593    ///
4594    /// Supported operand variants:
4595    ///
4596    /// ```text
4597    /// +---+----------+
4598    /// | # | Operands |
4599    /// +---+----------+
4600    /// | 1 | Xmm, Mem |
4601    /// | 2 | Xmm, Xmm |
4602    /// +---+----------+
4603    /// ```
4604    #[inline]
4605    pub fn sse_cvtpd2ps<A, B>(&mut self, op0: A, op1: B)
4606    where Assembler<'a>: SseCvtpd2psEmitter<A, B> {
4607        <Self as SseCvtpd2psEmitter<A, B>>::sse_cvtpd2ps(self, op0, op1);
4608    }
4609    /// `SSE_CVTPS2DQ` (CVTPS2DQ). 
4610    /// Converts four, eight or sixteen packed single precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.
4611    ///
4612    ///
4613    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPS2DQ.html).
4614    ///
4615    /// Supported operand variants:
4616    ///
4617    /// ```text
4618    /// +---+----------+
4619    /// | # | Operands |
4620    /// +---+----------+
4621    /// | 1 | Xmm, Mem |
4622    /// | 2 | Xmm, Xmm |
4623    /// +---+----------+
4624    /// ```
4625    #[inline]
4626    pub fn sse_cvtps2dq<A, B>(&mut self, op0: A, op1: B)
4627    where Assembler<'a>: SseCvtps2dqEmitter<A, B> {
4628        <Self as SseCvtps2dqEmitter<A, B>>::sse_cvtps2dq(self, op0, op1);
4629    }
4630    /// `SSE_CVTPS2PD` (CVTPS2PD). 
4631    /// Converts two, four or eight packed single precision floating-point values in the source operand (second operand) to two, four or eight packed double precision floating-point values in the destination operand (first operand).
4632    ///
4633    ///
4634    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTPS2PD.html).
4635    ///
4636    /// Supported operand variants:
4637    ///
4638    /// ```text
4639    /// +---+----------+
4640    /// | # | Operands |
4641    /// +---+----------+
4642    /// | 1 | Xmm, Mem |
4643    /// | 2 | Xmm, Xmm |
4644    /// +---+----------+
4645    /// ```
4646    #[inline]
4647    pub fn sse_cvtps2pd<A, B>(&mut self, op0: A, op1: B)
4648    where Assembler<'a>: SseCvtps2pdEmitter<A, B> {
4649        <Self as SseCvtps2pdEmitter<A, B>>::sse_cvtps2pd(self, op0, op1);
4650    }
4651    /// `SSE_CVTSD2SI` (CVTSD2SI). 
4652    /// Converts a double precision floating-point value in the source operand (the second operand) to a signed double-word integer in the destination operand (first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double precision floating-point value is contained in the low quadword of the register.
4653    ///
4654    ///
4655    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSD2SI.html).
4656    ///
4657    /// Supported operand variants:
4658    ///
4659    /// ```text
4660    /// +---+----------+
4661    /// | # | Operands |
4662    /// +---+----------+
4663    /// | 1 | Gpd, Mem |
4664    /// | 2 | Gpd, Xmm |
4665    /// | 3 | Gpq, Mem |
4666    /// | 4 | Gpq, Xmm |
4667    /// +---+----------+
4668    /// ```
4669    #[inline]
4670    pub fn sse_cvtsd2si<A, B>(&mut self, op0: A, op1: B)
4671    where Assembler<'a>: SseCvtsd2siEmitter<A, B> {
4672        <Self as SseCvtsd2siEmitter<A, B>>::sse_cvtsd2si(self, op0, op1);
4673    }
4674    /// `SSE_CVTSD2SS` (CVTSD2SS). 
4675    /// Converts a double precision floating-point value in the “convert-from” source operand (the second operand in SSE2 version, otherwise the third operand) to a single precision floating-point value in the destination operand.
4676    ///
4677    ///
4678    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSD2SS.html).
4679    ///
4680    /// Supported operand variants:
4681    ///
4682    /// ```text
4683    /// +---+----------+
4684    /// | # | Operands |
4685    /// +---+----------+
4686    /// | 1 | Xmm, Mem |
4687    /// | 2 | Xmm, Xmm |
4688    /// +---+----------+
4689    /// ```
4690    #[inline]
4691    pub fn sse_cvtsd2ss<A, B>(&mut self, op0: A, op1: B)
4692    where Assembler<'a>: SseCvtsd2ssEmitter<A, B> {
4693        <Self as SseCvtsd2ssEmitter<A, B>>::sse_cvtsd2ss(self, op0, op1);
4694    }
4695    /// `SSE_CVTSI2SD` (CVTSI2SD). 
4696    /// Converts a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the “convert-from” source operand to a double precision floating-point value in the destination operand. The result is stored in the low quadword of the destination operand, and the high quadword left unchanged. When conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register.
4697    ///
4698    ///
4699    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSI2SD.html).
4700    ///
4701    /// Supported operand variants:
4702    ///
4703    /// ```text
4704    /// +---+----------+
4705    /// | # | Operands |
4706    /// +---+----------+
4707    /// | 1 | Xmm, Gpd |
4708    /// | 2 | Xmm, Gpq |
4709    /// | 3 | Xmm, Mem |
4710    /// +---+----------+
4711    /// ```
4712    #[inline]
4713    pub fn sse_cvtsi2sd<A, B>(&mut self, op0: A, op1: B)
4714    where Assembler<'a>: SseCvtsi2sdEmitter<A, B> {
4715        <Self as SseCvtsi2sdEmitter<A, B>>::sse_cvtsi2sd(self, op0, op1);
4716    }
4717    /// `SSE_CVTSS2SD` (CVTSS2SD). 
4718    /// Converts a single precision floating-point value in the “convert-from” source operand to a double precision floating-point value in the destination operand. When the “convert-from” source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register. The result is stored in the low quadword of the destination operand.
4719    ///
4720    ///
4721    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSS2SD.html).
4722    ///
4723    /// Supported operand variants:
4724    ///
4725    /// ```text
4726    /// +---+----------+
4727    /// | # | Operands |
4728    /// +---+----------+
4729    /// | 1 | Xmm, Mem |
4730    /// | 2 | Xmm, Xmm |
4731    /// +---+----------+
4732    /// ```
4733    #[inline]
4734    pub fn sse_cvtss2sd<A, B>(&mut self, op0: A, op1: B)
4735    where Assembler<'a>: SseCvtss2sdEmitter<A, B> {
4736        <Self as SseCvtss2sdEmitter<A, B>>::sse_cvtss2sd(self, op0, op1);
4737    }
4738    /// `SSE_CVTTPD2DQ` (CVTTPD2DQ). 
4739    /// Converts two, four or eight packed double precision floating-point values in the source operand (second operand) to two, four or eight packed signed doubleword integers in the destination operand (first operand).
4740    ///
4741    ///
4742    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPD2DQ.html).
4743    ///
4744    /// Supported operand variants:
4745    ///
4746    /// ```text
4747    /// +---+----------+
4748    /// | # | Operands |
4749    /// +---+----------+
4750    /// | 1 | Xmm, Mem |
4751    /// | 2 | Xmm, Xmm |
4752    /// +---+----------+
4753    /// ```
4754    #[inline]
4755    pub fn sse_cvttpd2dq<A, B>(&mut self, op0: A, op1: B)
4756    where Assembler<'a>: SseCvttpd2dqEmitter<A, B> {
4757        <Self as SseCvttpd2dqEmitter<A, B>>::sse_cvttpd2dq(self, op0, op1);
4758    }
4759    /// `SSE_CVTTPS2DQ` (CVTTPS2DQ). 
4760    /// Converts four, eight or sixteen packed single precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.
4761    ///
4762    ///
4763    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTPS2DQ.html).
4764    ///
4765    /// Supported operand variants:
4766    ///
4767    /// ```text
4768    /// +---+----------+
4769    /// | # | Operands |
4770    /// +---+----------+
4771    /// | 1 | Xmm, Mem |
4772    /// | 2 | Xmm, Xmm |
4773    /// +---+----------+
4774    /// ```
4775    #[inline]
4776    pub fn sse_cvttps2dq<A, B>(&mut self, op0: A, op1: B)
4777    where Assembler<'a>: SseCvttps2dqEmitter<A, B> {
4778        <Self as SseCvttps2dqEmitter<A, B>>::sse_cvttps2dq(self, op0, op1);
4779    }
4780    /// `SSE_CVTTSD2SI` (CVTTSD2SI). 
4781    /// Converts a double precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the double precision floating-point value is contained in the low quadword of the register.
4782    ///
4783    ///
4784    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTSD2SI.html).
4785    ///
4786    /// Supported operand variants:
4787    ///
4788    /// ```text
4789    /// +---+----------+
4790    /// | # | Operands |
4791    /// +---+----------+
4792    /// | 1 | Gpd, Mem |
4793    /// | 2 | Gpd, Xmm |
4794    /// | 3 | Gpq, Mem |
4795    /// | 4 | Gpq, Xmm |
4796    /// +---+----------+
4797    /// ```
4798    #[inline]
4799    pub fn sse_cvttsd2si<A, B>(&mut self, op0: A, op1: B)
4800    where Assembler<'a>: SseCvttsd2siEmitter<A, B> {
4801        <Self as SseCvttsd2siEmitter<A, B>>::sse_cvttsd2si(self, op0, op1);
4802    }
4803    /// `SSE_DIVPD` (DIVPD). 
4804    /// Performs a SIMD divide of the double precision floating-point values in the first source operand by the floating-point values in the second source operand (the third operand). Results are written to the destination operand (the first operand).
4805    ///
4806    ///
4807    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVPD.html).
4808    ///
4809    /// Supported operand variants:
4810    ///
4811    /// ```text
4812    /// +---+----------+
4813    /// | # | Operands |
4814    /// +---+----------+
4815    /// | 1 | Xmm, Mem |
4816    /// | 2 | Xmm, Xmm |
4817    /// +---+----------+
4818    /// ```
4819    #[inline]
4820    pub fn sse_divpd<A, B>(&mut self, op0: A, op1: B)
4821    where Assembler<'a>: SseDivpdEmitter<A, B> {
4822        <Self as SseDivpdEmitter<A, B>>::sse_divpd(self, op0, op1);
4823    }
4824    /// `SSE_DIVSD` (DIVSD). 
4825    /// Divides the low double precision floating-point value in the first source operand by the low double precision floating-point value in the second source operand, and stores the double precision floating-point result in the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source and destination are XMM registers.
4826    ///
4827    ///
4828    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVSD.html).
4829    ///
4830    /// Supported operand variants:
4831    ///
4832    /// ```text
4833    /// +---+----------+
4834    /// | # | Operands |
4835    /// +---+----------+
4836    /// | 1 | Xmm, Mem |
4837    /// | 2 | Xmm, Xmm |
4838    /// +---+----------+
4839    /// ```
4840    #[inline]
4841    pub fn sse_divsd<A, B>(&mut self, op0: A, op1: B)
4842    where Assembler<'a>: SseDivsdEmitter<A, B> {
4843        <Self as SseDivsdEmitter<A, B>>::sse_divsd(self, op0, op1);
4844    }
4845    /// `SSE_MASKMOVDQU` (MASKMOVDQU). 
4846    /// Stores selected bytes from the source operand (first operand) into an 128-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are XMM registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
4847    ///
4848    ///
4849    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVDQU.html).
4850    ///
4851    /// Supported operand variants:
4852    ///
4853    /// ```text
4854    /// +---+----------+
4855    /// | # | Operands |
4856    /// +---+----------+
4857    /// | 1 | Xmm, Xmm |
4858    /// +---+----------+
4859    /// ```
4860    #[inline]
4861    pub fn sse_maskmovdqu<A, B>(&mut self, op0: A, op1: B)
4862    where Assembler<'a>: SseMaskmovdquEmitter<A, B> {
4863        <Self as SseMaskmovdquEmitter<A, B>>::sse_maskmovdqu(self, op0, op1);
4864    }
4865    /// `SSE_MAXPD` (MAXPD). 
4866    /// Performs a SIMD compare of the packed double precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand.
4867    ///
4868    ///
4869    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXPD.html).
4870    ///
4871    /// Supported operand variants:
4872    ///
4873    /// ```text
4874    /// +---+----------+
4875    /// | # | Operands |
4876    /// +---+----------+
4877    /// | 1 | Xmm, Mem |
4878    /// | 2 | Xmm, Xmm |
4879    /// +---+----------+
4880    /// ```
4881    #[inline]
4882    pub fn sse_maxpd<A, B>(&mut self, op0: A, op1: B)
4883    where Assembler<'a>: SseMaxpdEmitter<A, B> {
4884        <Self as SseMaxpdEmitter<A, B>>::sse_maxpd(self, op0, op1);
4885    }
4886    /// `SSE_MAXSD` (MAXSD). 
4887    /// Compares the low double precision floating-point values in the first source operand and the second source operand, and returns the maximum value to the low quadword of the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source and destination operands are XMM registers. When the second source operand is a memory operand, only 64 bits are accessed.
4888    ///
4889    ///
4890    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXSD.html).
4891    ///
4892    /// Supported operand variants:
4893    ///
4894    /// ```text
4895    /// +---+----------+
4896    /// | # | Operands |
4897    /// +---+----------+
4898    /// | 1 | Xmm, Mem |
4899    /// | 2 | Xmm, Xmm |
4900    /// +---+----------+
4901    /// ```
4902    #[inline]
4903    pub fn sse_maxsd<A, B>(&mut self, op0: A, op1: B)
4904    where Assembler<'a>: SseMaxsdEmitter<A, B> {
4905        <Self as SseMaxsdEmitter<A, B>>::sse_maxsd(self, op0, op1);
4906    }
4907    /// `SSE_MINPD` (MINPD). 
4908    /// Performs a SIMD compare of the packed double precision floating-point values in the first source operand and the second source operand and returns the minimum value for each pair of values to the destination operand.
4909    ///
4910    ///
4911    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINPD.html).
4912    ///
4913    /// Supported operand variants:
4914    ///
4915    /// ```text
4916    /// +---+----------+
4917    /// | # | Operands |
4918    /// +---+----------+
4919    /// | 1 | Xmm, Mem |
4920    /// | 2 | Xmm, Xmm |
4921    /// +---+----------+
4922    /// ```
4923    #[inline]
4924    pub fn sse_minpd<A, B>(&mut self, op0: A, op1: B)
4925    where Assembler<'a>: SseMinpdEmitter<A, B> {
4926        <Self as SseMinpdEmitter<A, B>>::sse_minpd(self, op0, op1);
4927    }
4928    /// `SSE_MINSD` (MINSD). 
4929    /// Compares the low double precision floating-point values in the first source operand and the second source operand, and returns the minimum value to the low quadword of the destination operand. When the source operand is a memory operand, only the 64 bits are accessed.
4930    ///
4931    ///
4932    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINSD.html).
4933    ///
4934    /// Supported operand variants:
4935    ///
4936    /// ```text
4937    /// +---+----------+
4938    /// | # | Operands |
4939    /// +---+----------+
4940    /// | 1 | Xmm, Mem |
4941    /// | 2 | Xmm, Xmm |
4942    /// +---+----------+
4943    /// ```
4944    #[inline]
4945    pub fn sse_minsd<A, B>(&mut self, op0: A, op1: B)
4946    where Assembler<'a>: SseMinsdEmitter<A, B> {
4947        <Self as SseMinsdEmitter<A, B>>::sse_minsd(self, op0, op1);
4948    }
4949    /// `SSE_MOVAPD` (MOVAPD). 
4950    /// Moves 2, 4 or 8 double precision floating-point values from the source operand (second operand) to the destination operand (first operand). This instruction can be used to load an XMM, YMM or ZMM register from an 128-bit, 256-bit or 512-bit memory location, to store the contents of an XMM, YMM or ZMM register into a 128-bit, 256-bit or 512-bit memory location, or to move data between two XMM, two YMM or two ZMM registers.
4951    ///
4952    ///
4953    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVAPD.html).
4954    ///
4955    /// Supported operand variants:
4956    ///
4957    /// ```text
4958    /// +---+----------+
4959    /// | # | Operands |
4960    /// +---+----------+
4961    /// | 1 | Mem, Xmm |
4962    /// | 2 | Xmm, Mem |
4963    /// | 3 | Xmm, Xmm |
4964    /// +---+----------+
4965    /// ```
4966    #[inline]
4967    pub fn sse_movapd<A, B>(&mut self, op0: A, op1: B)
4968    where Assembler<'a>: SseMovapdEmitter<A, B> {
4969        <Self as SseMovapdEmitter<A, B>>::sse_movapd(self, op0, op1);
4970    }
4971    /// `SSE_MOVDQA` (MOVDQA). 
4972    /// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
4973    ///
4974    ///
4975    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQA%3AVMOVDQA32%3AVMOVDQA64.html).
4976    ///
4977    /// Supported operand variants:
4978    ///
4979    /// ```text
4980    /// +---+----------+
4981    /// | # | Operands |
4982    /// +---+----------+
4983    /// | 1 | Mem, Xmm |
4984    /// | 2 | Xmm, Mem |
4985    /// | 3 | Xmm, Xmm |
4986    /// +---+----------+
4987    /// ```
4988    #[inline]
4989    pub fn sse_movdqa<A, B>(&mut self, op0: A, op1: B)
4990    where Assembler<'a>: SseMovdqaEmitter<A, B> {
4991        <Self as SseMovdqaEmitter<A, B>>::sse_movdqa(self, op0, op1);
4992    }
4993    /// `SSE_MOVDQU` (MOVDQU). 
4994    /// Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b otherwise instructions will #UD.
4995    ///
4996    ///
4997    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVDQU%3AVMOVDQU8%3AVMOVDQU16%3AVMOVDQU32%3AVMOVDQU64.html).
4998    ///
4999    /// Supported operand variants:
5000    ///
5001    /// ```text
5002    /// +---+----------+
5003    /// | # | Operands |
5004    /// +---+----------+
5005    /// | 1 | Mem, Xmm |
5006    /// | 2 | Xmm, Mem |
5007    /// | 3 | Xmm, Xmm |
5008    /// +---+----------+
5009    /// ```
5010    #[inline]
5011    pub fn sse_movdqu<A, B>(&mut self, op0: A, op1: B)
5012    where Assembler<'a>: SseMovdquEmitter<A, B> {
5013        <Self as SseMovdquEmitter<A, B>>::sse_movdqu(self, op0, op1);
5014    }
5015    /// `SSE_MOVD_G2X` (MOVD). 
5016    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
5017    ///
5018    ///
5019    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
5020    ///
5021    /// Supported operand variants:
5022    ///
5023    /// ```text
5024    /// +---+----------+
5025    /// | # | Operands |
5026    /// +---+----------+
5027    /// | 1 | Xmm, Gpd |
5028    /// | 2 | Xmm, Mem |
5029    /// +---+----------+
5030    /// ```
5031    #[inline]
5032    pub fn sse_movd_g2x<A, B>(&mut self, op0: A, op1: B)
5033    where Assembler<'a>: SseMovdG2xEmitter<A, B> {
5034        <Self as SseMovdG2xEmitter<A, B>>::sse_movd_g2x(self, op0, op1);
5035    }
5036    /// `SSE_MOVD_X2G` (MOVD). 
5037    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
5038    ///
5039    ///
5040    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
5041    ///
5042    /// Supported operand variants:
5043    ///
5044    /// ```text
5045    /// +---+----------+
5046    /// | # | Operands |
5047    /// +---+----------+
5048    /// | 1 | Gpd, Xmm |
5049    /// | 2 | Mem, Xmm |
5050    /// +---+----------+
5051    /// ```
5052    #[inline]
5053    pub fn sse_movd_x2g<A, B>(&mut self, op0: A, op1: B)
5054    where Assembler<'a>: SseMovdX2gEmitter<A, B> {
5055        <Self as SseMovdX2gEmitter<A, B>>::sse_movd_x2g(self, op0, op1);
5056    }
5057    /// `SSE_MOVHPD` (MOVHPD). 
5058    /// This instruction cannot be used for register to register or memory to memory moves.
5059    ///
5060    ///
5061    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHPD.html).
5062    ///
5063    /// Supported operand variants:
5064    ///
5065    /// ```text
5066    /// +---+----------+
5067    /// | # | Operands |
5068    /// +---+----------+
5069    /// | 1 | Mem, Xmm |
5070    /// | 2 | Xmm, Mem |
5071    /// +---+----------+
5072    /// ```
5073    #[inline]
5074    pub fn sse_movhpd<A, B>(&mut self, op0: A, op1: B)
5075    where Assembler<'a>: SseMovhpdEmitter<A, B> {
5076        <Self as SseMovhpdEmitter<A, B>>::sse_movhpd(self, op0, op1);
5077    }
5078    /// `SSE_MOVLPD` (MOVLPD). 
5079    /// This instruction cannot be used for register to register or memory to memory moves.
5080    ///
5081    ///
5082    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLPD.html).
5083    ///
5084    /// Supported operand variants:
5085    ///
5086    /// ```text
5087    /// +---+----------+
5088    /// | # | Operands |
5089    /// +---+----------+
5090    /// | 1 | Mem, Xmm |
5091    /// | 2 | Xmm, Mem |
5092    /// +---+----------+
5093    /// ```
5094    #[inline]
5095    pub fn sse_movlpd<A, B>(&mut self, op0: A, op1: B)
5096    where Assembler<'a>: SseMovlpdEmitter<A, B> {
5097        <Self as SseMovlpdEmitter<A, B>>::sse_movlpd(self, op0, op1);
5098    }
5099    /// `SSE_MOVMSKPD` (MOVMSKPD). 
5100    /// Extracts the sign bits from the packed double precision floating-point values in the source operand (second operand), formats them into a 2-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM register, and the destination operand is a general-purpose register. The mask is stored in the 2 low-order bits of the destination operand. Zero-extend the upper bits of the destination.
5101    ///
5102    ///
5103    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPD.html).
5104    ///
5105    /// Supported operand variants:
5106    ///
5107    /// ```text
5108    /// +---+----------+
5109    /// | # | Operands |
5110    /// +---+----------+
5111    /// | 1 | Gpq, Xmm |
5112    /// +---+----------+
5113    /// ```
5114    #[inline]
5115    pub fn sse_movmskpd<A, B>(&mut self, op0: A, op1: B)
5116    where Assembler<'a>: SseMovmskpdEmitter<A, B> {
5117        <Self as SseMovmskpdEmitter<A, B>>::sse_movmskpd(self, op0, op1);
5118    }
5119    /// `SSE_MOVNTDQ` (MOVNTDQ). 
5120    /// Moves the packed integers in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain integer data (packed bytes, words, double-words, or quadwords). The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (512-bit version) boundary otherwise a general-protection exception (#GP) will be generated.
5121    ///
5122    ///
5123    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTDQ.html).
5124    ///
5125    /// Supported operand variants:
5126    ///
5127    /// ```text
5128    /// +---+----------+
5129    /// | # | Operands |
5130    /// +---+----------+
5131    /// | 1 | Mem, Xmm |
5132    /// +---+----------+
5133    /// ```
5134    #[inline]
5135    pub fn sse_movntdq<A, B>(&mut self, op0: A, op1: B)
5136    where Assembler<'a>: SseMovntdqEmitter<A, B> {
5137        <Self as SseMovntdqEmitter<A, B>>::sse_movntdq(self, op0, op1);
5138    }
5139    /// `SSE_MOVNTPD` (MOVNTPD). 
5140    /// Moves the packed double precision floating-point values in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain packed double precision, floating-pointing data. The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (EVEX.512 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.
5141    ///
5142    ///
5143    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTPD.html).
5144    ///
5145    /// Supported operand variants:
5146    ///
5147    /// ```text
5148    /// +---+----------+
5149    /// | # | Operands |
5150    /// +---+----------+
5151    /// | 1 | Mem, Xmm |
5152    /// +---+----------+
5153    /// ```
5154    #[inline]
5155    pub fn sse_movntpd<A, B>(&mut self, op0: A, op1: B)
5156    where Assembler<'a>: SseMovntpdEmitter<A, B> {
5157        <Self as SseMovntpdEmitter<A, B>>::sse_movntpd(self, op0, op1);
5158    }
5159    /// `SSE_MOVNTSD`.
5160    ///
5161    /// Supported operand variants:
5162    ///
5163    /// ```text
5164    /// +---+----------+
5165    /// | # | Operands |
5166    /// +---+----------+
5167    /// | 1 | Mem, Xmm |
5168    /// +---+----------+
5169    /// ```
5170    #[inline]
5171    pub fn sse_movntsd<A, B>(&mut self, op0: A, op1: B)
5172    where Assembler<'a>: SseMovntsdEmitter<A, B> {
5173        <Self as SseMovntsdEmitter<A, B>>::sse_movntsd(self, op0, op1);
5174    }
5175    /// `SSE_MOVQ` (MOVQ). 
5176    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
5177    ///
5178    ///
5179    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
5180    ///
5181    /// Supported operand variants:
5182    ///
5183    /// ```text
5184    /// +---+----------+
5185    /// | # | Operands |
5186    /// +---+----------+
5187    /// | 1 | Mem, Xmm |
5188    /// | 2 | Xmm, Mem |
5189    /// | 3 | Xmm, Xmm |
5190    /// +---+----------+
5191    /// ```
5192    #[inline]
5193    pub fn sse_movq<A, B>(&mut self, op0: A, op1: B)
5194    where Assembler<'a>: SseMovqEmitter<A, B> {
5195        <Self as SseMovqEmitter<A, B>>::sse_movq(self, op0, op1);
5196    }
5197    /// `SSE_MOVQ_G2X` (MOVQ). 
5198    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
5199    ///
5200    ///
5201    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
5202    ///
5203    /// Supported operand variants:
5204    ///
5205    /// ```text
5206    /// +---+----------+
5207    /// | # | Operands |
5208    /// +---+----------+
5209    /// | 1 | Xmm, Gpd |
5210    /// | 2 | Xmm, Mem |
5211    /// +---+----------+
5212    /// ```
5213    #[inline]
5214    pub fn sse_movq_g2x<A, B>(&mut self, op0: A, op1: B)
5215    where Assembler<'a>: SseMovqG2xEmitter<A, B> {
5216        <Self as SseMovqG2xEmitter<A, B>>::sse_movq_g2x(self, op0, op1);
5217    }
5218    /// `SSE_MOVQ_X2G` (MOVQ). 
5219    /// Copies a doubleword from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be general-purpose registers, MMX technology registers, XMM registers, or 32-bit memory locations. This instruction can be used to move a doubleword to and from the low doubleword of an MMX technology register and a general-purpose register or a 32-bit memory location, or to and from the low doubleword of an XMM register and a general-purpose register or a 32-bit memory location. The instruction cannot be used to transfer data between MMX technology registers, between XMM registers, between general-purpose registers, or between memory locations.
5220    ///
5221    ///
5222    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVD%3AMOVQ.html).
5223    ///
5224    /// Supported operand variants:
5225    ///
5226    /// ```text
5227    /// +---+----------+
5228    /// | # | Operands |
5229    /// +---+----------+
5230    /// | 1 | Gpd, Xmm |
5231    /// | 2 | Mem, Xmm |
5232    /// +---+----------+
5233    /// ```
5234    #[inline]
5235    pub fn sse_movq_x2g<A, B>(&mut self, op0: A, op1: B)
5236    where Assembler<'a>: SseMovqX2gEmitter<A, B> {
5237        <Self as SseMovqX2gEmitter<A, B>>::sse_movq_x2g(self, op0, op1);
5238    }
5239    /// `SSE_MOVSD` (MOVSD). 
5240    /// Moves the byte, word, or doubleword specified with the second operand (source operand) to the location specified with the first operand (destination operand). Both the source and destination operands are located in memory. The address of the source operand is read from the DS:ESI or the DS:SI registers (depending on the address-size attribute of the instruction, 32 or 16, respectively). The address of the destination operand is read from the ES:EDI or the ES:DI registers (again depending on the address-size attribute of the instruction). The DS segment may be overridden with a segment override prefix, but the ES segment cannot be overridden.
5241    ///
5242    ///
5243    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVS%3AMOVSB%3AMOVSW%3AMOVSD%3AMOVSQ.html).
5244    ///
5245    /// Supported operand variants:
5246    ///
5247    /// ```text
5248    /// +---+----------+
5249    /// | # | Operands |
5250    /// +---+----------+
5251    /// | 1 | Mem, Xmm |
5252    /// | 2 | Xmm, Mem |
5253    /// | 3 | Xmm, Xmm |
5254    /// +---+----------+
5255    /// ```
5256    #[inline]
5257    pub fn sse_movsd<A, B>(&mut self, op0: A, op1: B)
5258    where Assembler<'a>: SseMovsdEmitter<A, B> {
5259        <Self as SseMovsdEmitter<A, B>>::sse_movsd(self, op0, op1);
5260    }
5261    /// `SSE_MOVUPD` (MOVUPD). 
5262    /// Note: VEX.vvvv and EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
5263    ///
5264    ///
5265    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVUPD.html).
5266    ///
5267    /// Supported operand variants:
5268    ///
5269    /// ```text
5270    /// +---+----------+
5271    /// | # | Operands |
5272    /// +---+----------+
5273    /// | 1 | Mem, Xmm |
5274    /// | 2 | Xmm, Mem |
5275    /// | 3 | Xmm, Xmm |
5276    /// +---+----------+
5277    /// ```
5278    #[inline]
5279    pub fn sse_movupd<A, B>(&mut self, op0: A, op1: B)
5280    where Assembler<'a>: SseMovupdEmitter<A, B> {
5281        <Self as SseMovupdEmitter<A, B>>::sse_movupd(self, op0, op1);
5282    }
5283    /// `SSE_MULPD` (MULPD). 
5284    /// Multiply packed double precision floating-point values from the first source operand with corresponding values in the second source operand, and stores the packed double precision floating-point results in the destination operand.
5285    ///
5286    ///
5287    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULPD.html).
5288    ///
5289    /// Supported operand variants:
5290    ///
5291    /// ```text
5292    /// +---+----------+
5293    /// | # | Operands |
5294    /// +---+----------+
5295    /// | 1 | Xmm, Mem |
5296    /// | 2 | Xmm, Xmm |
5297    /// +---+----------+
5298    /// ```
5299    #[inline]
5300    pub fn sse_mulpd<A, B>(&mut self, op0: A, op1: B)
5301    where Assembler<'a>: SseMulpdEmitter<A, B> {
5302        <Self as SseMulpdEmitter<A, B>>::sse_mulpd(self, op0, op1);
5303    }
5304    /// `SSE_MULSD` (MULSD). 
5305    /// Multiplies the low double precision floating-point value in the second source operand by the low double precision floating-point value in the first source operand, and stores the double precision floating-point result in the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source operand and the destination operands are XMM registers.
5306    ///
5307    ///
5308    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULSD.html).
5309    ///
5310    /// Supported operand variants:
5311    ///
5312    /// ```text
5313    /// +---+----------+
5314    /// | # | Operands |
5315    /// +---+----------+
5316    /// | 1 | Xmm, Mem |
5317    /// | 2 | Xmm, Xmm |
5318    /// +---+----------+
5319    /// ```
5320    #[inline]
5321    pub fn sse_mulsd<A, B>(&mut self, op0: A, op1: B)
5322    where Assembler<'a>: SseMulsdEmitter<A, B> {
5323        <Self as SseMulsdEmitter<A, B>>::sse_mulsd(self, op0, op1);
5324    }
5325    /// `SSE_ORPD` (ORPD). 
5326    /// Performs a bitwise logical OR of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
5327    ///
5328    ///
5329    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ORPD.html).
5330    ///
5331    /// Supported operand variants:
5332    ///
5333    /// ```text
5334    /// +---+----------+
5335    /// | # | Operands |
5336    /// +---+----------+
5337    /// | 1 | Xmm, Mem |
5338    /// | 2 | Xmm, Xmm |
5339    /// +---+----------+
5340    /// ```
5341    #[inline]
5342    pub fn sse_orpd<A, B>(&mut self, op0: A, op1: B)
5343    where Assembler<'a>: SseOrpdEmitter<A, B> {
5344        <Self as SseOrpdEmitter<A, B>>::sse_orpd(self, op0, op1);
5345    }
5346    /// `SSE_PACKSSDW` (PACKSSDW). 
5347    /// Converts packed signed word integers into packed signed byte integers (PACKSSWB) or converts packed signed doubleword integers into packed signed word integers (PACKSSDW), using saturation to handle overflow conditions. See Figure 4-6 for an example of the packing operation.
5348    ///
5349    ///
5350    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PACKSSWB%3APACKSSDW.html).
5351    ///
5352    /// Supported operand variants:
5353    ///
5354    /// ```text
5355    /// +---+----------+
5356    /// | # | Operands |
5357    /// +---+----------+
5358    /// | 1 | Xmm, Mem |
5359    /// | 2 | Xmm, Xmm |
5360    /// +---+----------+
5361    /// ```
5362    #[inline]
5363    pub fn sse_packssdw<A, B>(&mut self, op0: A, op1: B)
5364    where Assembler<'a>: SsePackssdwEmitter<A, B> {
5365        <Self as SsePackssdwEmitter<A, B>>::sse_packssdw(self, op0, op1);
5366    }
5367    /// `SSE_PACKSSWB` (PACKSSWB). 
5368    /// Converts packed signed word integers into packed signed byte integers (PACKSSWB) or converts packed signed doubleword integers into packed signed word integers (PACKSSDW), using saturation to handle overflow conditions. See Figure 4-6 for an example of the packing operation.
5369    ///
5370    ///
5371    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PACKSSWB%3APACKSSDW.html).
5372    ///
5373    /// Supported operand variants:
5374    ///
5375    /// ```text
5376    /// +---+----------+
5377    /// | # | Operands |
5378    /// +---+----------+
5379    /// | 1 | Xmm, Mem |
5380    /// | 2 | Xmm, Xmm |
5381    /// +---+----------+
5382    /// ```
5383    #[inline]
5384    pub fn sse_packsswb<A, B>(&mut self, op0: A, op1: B)
5385    where Assembler<'a>: SsePacksswbEmitter<A, B> {
5386        <Self as SsePacksswbEmitter<A, B>>::sse_packsswb(self, op0, op1);
5387    }
5388    /// `SSE_PACKUSWB` (PACKUSWB). 
5389    /// Converts 4, 8, 16, or 32 signed word integers from the destination operand (first operand) and 4, 8, 16, or 32 signed word integers from the source operand (second operand) into 8, 16, 32 or 64 unsigned byte integers and stores the result in the destination operand. (See Figure 4-6 for an example of the packing operation.) If a signed word integer value is beyond the range of an unsigned byte integer (that is, greater than FFH or less than 00H), the saturated unsigned byte integer value of FFH or 00H, respectively, is stored in the destination.
5390    ///
5391    ///
5392    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PACKUSWB.html).
5393    ///
5394    /// Supported operand variants:
5395    ///
5396    /// ```text
5397    /// +---+----------+
5398    /// | # | Operands |
5399    /// +---+----------+
5400    /// | 1 | Xmm, Mem |
5401    /// | 2 | Xmm, Xmm |
5402    /// +---+----------+
5403    /// ```
5404    #[inline]
5405    pub fn sse_packuswb<A, B>(&mut self, op0: A, op1: B)
5406    where Assembler<'a>: SsePackuswbEmitter<A, B> {
5407        <Self as SsePackuswbEmitter<A, B>>::sse_packuswb(self, op0, op1);
5408    }
5409    /// `SSE_PADDB` (PADDB). 
5410    /// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
5411    ///
5412    ///
5413    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
5414    ///
5415    /// Supported operand variants:
5416    ///
5417    /// ```text
5418    /// +---+----------+
5419    /// | # | Operands |
5420    /// +---+----------+
5421    /// | 1 | Xmm, Mem |
5422    /// | 2 | Xmm, Xmm |
5423    /// +---+----------+
5424    /// ```
5425    #[inline]
5426    pub fn sse_paddb<A, B>(&mut self, op0: A, op1: B)
5427    where Assembler<'a>: SsePaddbEmitter<A, B> {
5428        <Self as SsePaddbEmitter<A, B>>::sse_paddb(self, op0, op1);
5429    }
5430    /// `SSE_PADDD` (PADDD). 
5431    /// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
5432    ///
5433    ///
5434    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
5435    ///
5436    /// Supported operand variants:
5437    ///
5438    /// ```text
5439    /// +---+----------+
5440    /// | # | Operands |
5441    /// +---+----------+
5442    /// | 1 | Xmm, Mem |
5443    /// | 2 | Xmm, Xmm |
5444    /// +---+----------+
5445    /// ```
5446    #[inline]
5447    pub fn sse_paddd<A, B>(&mut self, op0: A, op1: B)
5448    where Assembler<'a>: SsePadddEmitter<A, B> {
5449        <Self as SsePadddEmitter<A, B>>::sse_paddd(self, op0, op1);
5450    }
5451    /// `SSE_PADDQ` (PADDQ). 
5452    /// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
5453    ///
5454    ///
5455    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
5456    ///
5457    /// Supported operand variants:
5458    ///
5459    /// ```text
5460    /// +---+----------+
5461    /// | # | Operands |
5462    /// +---+----------+
5463    /// | 1 | Xmm, Mem |
5464    /// | 2 | Xmm, Xmm |
5465    /// +---+----------+
5466    /// ```
5467    #[inline]
5468    pub fn sse_paddq<A, B>(&mut self, op0: A, op1: B)
5469    where Assembler<'a>: SsePaddqEmitter<A, B> {
5470        <Self as SsePaddqEmitter<A, B>>::sse_paddq(self, op0, op1);
5471    }
5472    /// `SSE_PADDSB` (PADDSB). 
5473    /// Performs a SIMD add of the packed signed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
5474    ///
5475    ///
5476    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDSB%3APADDSW.html).
5477    ///
5478    /// Supported operand variants:
5479    ///
5480    /// ```text
5481    /// +---+----------+
5482    /// | # | Operands |
5483    /// +---+----------+
5484    /// | 1 | Xmm, Mem |
5485    /// | 2 | Xmm, Xmm |
5486    /// +---+----------+
5487    /// ```
5488    #[inline]
5489    pub fn sse_paddsb<A, B>(&mut self, op0: A, op1: B)
5490    where Assembler<'a>: SsePaddsbEmitter<A, B> {
5491        <Self as SsePaddsbEmitter<A, B>>::sse_paddsb(self, op0, op1);
5492    }
5493    /// `SSE_PADDSW` (PADDSW). 
5494    /// Performs a SIMD add of the packed signed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
5495    ///
5496    ///
5497    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDSB%3APADDSW.html).
5498    ///
5499    /// Supported operand variants:
5500    ///
5501    /// ```text
5502    /// +---+----------+
5503    /// | # | Operands |
5504    /// +---+----------+
5505    /// | 1 | Xmm, Mem |
5506    /// | 2 | Xmm, Xmm |
5507    /// +---+----------+
5508    /// ```
5509    #[inline]
5510    pub fn sse_paddsw<A, B>(&mut self, op0: A, op1: B)
5511    where Assembler<'a>: SsePaddswEmitter<A, B> {
5512        <Self as SsePaddswEmitter<A, B>>::sse_paddsw(self, op0, op1);
5513    }
5514    /// `SSE_PADDUSB` (PADDUSB). 
5515    /// Performs a SIMD add of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
5516    ///
5517    ///
5518    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDUSB%3APADDUSW.html).
5519    ///
5520    /// Supported operand variants:
5521    ///
5522    /// ```text
5523    /// +---+----------+
5524    /// | # | Operands |
5525    /// +---+----------+
5526    /// | 1 | Xmm, Mem |
5527    /// | 2 | Xmm, Xmm |
5528    /// +---+----------+
5529    /// ```
5530    #[inline]
5531    pub fn sse_paddusb<A, B>(&mut self, op0: A, op1: B)
5532    where Assembler<'a>: SsePaddusbEmitter<A, B> {
5533        <Self as SsePaddusbEmitter<A, B>>::sse_paddusb(self, op0, op1);
5534    }
5535    /// `SSE_PADDUSW` (PADDUSW). 
5536    /// Performs a SIMD add of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
5537    ///
5538    ///
5539    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDUSB%3APADDUSW.html).
5540    ///
5541    /// Supported operand variants:
5542    ///
5543    /// ```text
5544    /// +---+----------+
5545    /// | # | Operands |
5546    /// +---+----------+
5547    /// | 1 | Xmm, Mem |
5548    /// | 2 | Xmm, Xmm |
5549    /// +---+----------+
5550    /// ```
5551    #[inline]
5552    pub fn sse_paddusw<A, B>(&mut self, op0: A, op1: B)
5553    where Assembler<'a>: SsePadduswEmitter<A, B> {
5554        <Self as SsePadduswEmitter<A, B>>::sse_paddusw(self, op0, op1);
5555    }
5556    /// `SSE_PADDW` (PADDW). 
5557    /// Performs a SIMD add of the packed integers from the source operand (second operand) and the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
5558    ///
5559    ///
5560    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PADDB%3APADDW%3APADDD%3APADDQ.html).
5561    ///
5562    /// Supported operand variants:
5563    ///
5564    /// ```text
5565    /// +---+----------+
5566    /// | # | Operands |
5567    /// +---+----------+
5568    /// | 1 | Xmm, Mem |
5569    /// | 2 | Xmm, Xmm |
5570    /// +---+----------+
5571    /// ```
5572    #[inline]
5573    pub fn sse_paddw<A, B>(&mut self, op0: A, op1: B)
5574    where Assembler<'a>: SsePaddwEmitter<A, B> {
5575        <Self as SsePaddwEmitter<A, B>>::sse_paddw(self, op0, op1);
5576    }
5577    /// `SSE_PAND` (PAND). 
5578    /// Performs a bitwise logical AND operation on the first source operand and second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bits of the first and second operands are 1, otherwise it is set to 0.
5579    ///
5580    ///
5581    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAND.html).
5582    ///
5583    /// Supported operand variants:
5584    ///
5585    /// ```text
5586    /// +---+----------+
5587    /// | # | Operands |
5588    /// +---+----------+
5589    /// | 1 | Xmm, Mem |
5590    /// | 2 | Xmm, Xmm |
5591    /// +---+----------+
5592    /// ```
5593    #[inline]
5594    pub fn sse_pand<A, B>(&mut self, op0: A, op1: B)
5595    where Assembler<'a>: SsePandEmitter<A, B> {
5596        <Self as SsePandEmitter<A, B>>::sse_pand(self, op0, op1);
5597    }
5598    /// `SSE_PANDN` (PANDN). 
5599    /// Performs a bitwise logical NOT operation on the first source operand, then performs bitwise AND with second source operand and stores the result in the destination operand. Each bit of the result is set to 1 if the corresponding bit in the first operand is 0 and the corresponding bit in the second operand is 1, otherwise it is set to 0.
5600    ///
5601    ///
5602    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PANDN.html).
5603    ///
5604    /// Supported operand variants:
5605    ///
5606    /// ```text
5607    /// +---+----------+
5608    /// | # | Operands |
5609    /// +---+----------+
5610    /// | 1 | Xmm, Mem |
5611    /// | 2 | Xmm, Xmm |
5612    /// +---+----------+
5613    /// ```
5614    #[inline]
5615    pub fn sse_pandn<A, B>(&mut self, op0: A, op1: B)
5616    where Assembler<'a>: SsePandnEmitter<A, B> {
5617        <Self as SsePandnEmitter<A, B>>::sse_pandn(self, op0, op1);
5618    }
5619    /// `SSE_PAVGB` (PAVGB). 
5620    /// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
5621    ///
5622    ///
5623    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
5624    ///
5625    /// Supported operand variants:
5626    ///
5627    /// ```text
5628    /// +---+----------+
5629    /// | # | Operands |
5630    /// +---+----------+
5631    /// | 1 | Xmm, Mem |
5632    /// | 2 | Xmm, Xmm |
5633    /// +---+----------+
5634    /// ```
5635    #[inline]
5636    pub fn sse_pavgb<A, B>(&mut self, op0: A, op1: B)
5637    where Assembler<'a>: SsePavgbEmitter<A, B> {
5638        <Self as SsePavgbEmitter<A, B>>::sse_pavgb(self, op0, op1);
5639    }
5640    /// `SSE_PAVGW` (PAVGW). 
5641    /// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
5642    ///
5643    ///
5644    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
5645    ///
5646    /// Supported operand variants:
5647    ///
5648    /// ```text
5649    /// +---+----------+
5650    /// | # | Operands |
5651    /// +---+----------+
5652    /// | 1 | Xmm, Mem |
5653    /// | 2 | Xmm, Xmm |
5654    /// +---+----------+
5655    /// ```
5656    #[inline]
5657    pub fn sse_pavgw<A, B>(&mut self, op0: A, op1: B)
5658    where Assembler<'a>: SsePavgwEmitter<A, B> {
5659        <Self as SsePavgwEmitter<A, B>>::sse_pavgw(self, op0, op1);
5660    }
5661    /// `SSE_PCMPEQB` (PCMPEQB). 
5662    /// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
5663    ///
5664    ///
5665    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
5666    ///
5667    /// Supported operand variants:
5668    ///
5669    /// ```text
5670    /// +---+----------+
5671    /// | # | Operands |
5672    /// +---+----------+
5673    /// | 1 | Xmm, Mem |
5674    /// | 2 | Xmm, Xmm |
5675    /// +---+----------+
5676    /// ```
5677    #[inline]
5678    pub fn sse_pcmpeqb<A, B>(&mut self, op0: A, op1: B)
5679    where Assembler<'a>: SsePcmpeqbEmitter<A, B> {
5680        <Self as SsePcmpeqbEmitter<A, B>>::sse_pcmpeqb(self, op0, op1);
5681    }
5682    /// `SSE_PCMPEQD` (PCMPEQD). 
5683    /// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
5684    ///
5685    ///
5686    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
5687    ///
5688    /// Supported operand variants:
5689    ///
5690    /// ```text
5691    /// +---+----------+
5692    /// | # | Operands |
5693    /// +---+----------+
5694    /// | 1 | Xmm, Mem |
5695    /// | 2 | Xmm, Xmm |
5696    /// +---+----------+
5697    /// ```
5698    #[inline]
5699    pub fn sse_pcmpeqd<A, B>(&mut self, op0: A, op1: B)
5700    where Assembler<'a>: SsePcmpeqdEmitter<A, B> {
5701        <Self as SsePcmpeqdEmitter<A, B>>::sse_pcmpeqd(self, op0, op1);
5702    }
5703    /// `SSE_PCMPEQW` (PCMPEQW). 
5704    /// Performs a SIMD compare for equality of the packed bytes, words, or doublewords in the destination operand (first operand) and the source operand (second operand). If a pair of data elements is equal, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
5705    ///
5706    ///
5707    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPEQB%3APCMPEQW%3APCMPEQD.html).
5708    ///
5709    /// Supported operand variants:
5710    ///
5711    /// ```text
5712    /// +---+----------+
5713    /// | # | Operands |
5714    /// +---+----------+
5715    /// | 1 | Xmm, Mem |
5716    /// | 2 | Xmm, Xmm |
5717    /// +---+----------+
5718    /// ```
5719    #[inline]
5720    pub fn sse_pcmpeqw<A, B>(&mut self, op0: A, op1: B)
5721    where Assembler<'a>: SsePcmpeqwEmitter<A, B> {
5722        <Self as SsePcmpeqwEmitter<A, B>>::sse_pcmpeqw(self, op0, op1);
5723    }
5724    /// `SSE_PCMPGTB` (PCMPGTB). 
5725    /// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
5726    ///
5727    ///
5728    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
5729    ///
5730    /// Supported operand variants:
5731    ///
5732    /// ```text
5733    /// +---+----------+
5734    /// | # | Operands |
5735    /// +---+----------+
5736    /// | 1 | Xmm, Mem |
5737    /// | 2 | Xmm, Xmm |
5738    /// +---+----------+
5739    /// ```
5740    #[inline]
5741    pub fn sse_pcmpgtb<A, B>(&mut self, op0: A, op1: B)
5742    where Assembler<'a>: SsePcmpgtbEmitter<A, B> {
5743        <Self as SsePcmpgtbEmitter<A, B>>::sse_pcmpgtb(self, op0, op1);
5744    }
5745    /// `SSE_PCMPGTD` (PCMPGTD). 
5746    /// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
5747    ///
5748    ///
5749    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
5750    ///
5751    /// Supported operand variants:
5752    ///
5753    /// ```text
5754    /// +---+----------+
5755    /// | # | Operands |
5756    /// +---+----------+
5757    /// | 1 | Xmm, Mem |
5758    /// | 2 | Xmm, Xmm |
5759    /// +---+----------+
5760    /// ```
5761    #[inline]
5762    pub fn sse_pcmpgtd<A, B>(&mut self, op0: A, op1: B)
5763    where Assembler<'a>: SsePcmpgtdEmitter<A, B> {
5764        <Self as SsePcmpgtdEmitter<A, B>>::sse_pcmpgtd(self, op0, op1);
5765    }
5766    /// `SSE_PCMPGTW` (PCMPGTW). 
5767    /// Performs an SIMD signed compare for the greater value of the packed byte, word, or doubleword integers in the destination operand (first operand) and the source operand (second operand). If a data element in the destination operand is greater than the corresponding date element in the source operand, the corresponding data element in the destination operand is set to all 1s; otherwise, it is set to all 0s.
5768    ///
5769    ///
5770    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PCMPGTB%3APCMPGTW%3APCMPGTD.html).
5771    ///
5772    /// Supported operand variants:
5773    ///
5774    /// ```text
5775    /// +---+----------+
5776    /// | # | Operands |
5777    /// +---+----------+
5778    /// | 1 | Xmm, Mem |
5779    /// | 2 | Xmm, Xmm |
5780    /// +---+----------+
5781    /// ```
5782    #[inline]
5783    pub fn sse_pcmpgtw<A, B>(&mut self, op0: A, op1: B)
5784    where Assembler<'a>: SsePcmpgtwEmitter<A, B> {
5785        <Self as SsePcmpgtwEmitter<A, B>>::sse_pcmpgtw(self, op0, op1);
5786    }
5787    /// `SSE_PEXTRW` (PEXTRW). 
5788    /// Copies the word in the source operand (second operand) specified by the count operand (third operand) to the destination operand (first operand). The source operand can be an MMX technology register or an XMM register. The destination operand can be the low word of a general-purpose register or a 16-bit memory address. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location. The content of the destination register above bit 16 is cleared (set to all 0s).
5789    ///
5790    ///
5791    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRW.html).
5792    ///
5793    /// Supported operand variants:
5794    ///
5795    /// ```text
5796    /// +---+---------------+
5797    /// | # | Operands      |
5798    /// +---+---------------+
5799    /// | 1 | Gpd, Xmm, Imm |
5800    /// | 2 | Mem, Xmm, Imm |
5801    /// +---+---------------+
5802    /// ```
5803    #[inline]
5804    pub fn sse_pextrw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5805    where Assembler<'a>: SsePextrwEmitter<A, B, C> {
5806        <Self as SsePextrwEmitter<A, B, C>>::sse_pextrw(self, op0, op1, op2);
5807    }
5808    /// `SSE_PINSRW` (PINSRW). 
5809    /// Three operand MMX and SSE instructions
5810    ///
5811    ///
5812    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRW.html).
5813    ///
5814    /// Supported operand variants:
5815    ///
5816    /// ```text
5817    /// +---+---------------+
5818    /// | # | Operands      |
5819    /// +---+---------------+
5820    /// | 1 | Xmm, Gpd, Imm |
5821    /// | 2 | Xmm, Mem, Imm |
5822    /// +---+---------------+
5823    /// ```
5824    #[inline]
5825    pub fn sse_pinsrw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
5826    where Assembler<'a>: SsePinsrwEmitter<A, B, C> {
5827        <Self as SsePinsrwEmitter<A, B, C>>::sse_pinsrw(self, op0, op1, op2);
5828    }
5829    /// `SSE_PMADDWD` (PMADDWD). 
5830    /// Multiplies the individual signed words of the destination operand (first operand) by the corresponding signed words of the source operand (second operand), producing temporary signed, doubleword results. The adjacent double-word results are then summed and stored in the destination operand. For example, the corresponding low-order words (15-0) and (31-16) in the source and destination operands are multiplied by one another and the double-word results are added together and stored in the low doubleword of the destination register (31-0). The same operation is performed on the other pairs of adjacent words. (Figure 4-11 shows this operation when using 64-bit operands).
5831    ///
5832    ///
5833    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMADDWD.html).
5834    ///
5835    /// Supported operand variants:
5836    ///
5837    /// ```text
5838    /// +---+----------+
5839    /// | # | Operands |
5840    /// +---+----------+
5841    /// | 1 | Xmm, Mem |
5842    /// | 2 | Xmm, Xmm |
5843    /// +---+----------+
5844    /// ```
5845    #[inline]
5846    pub fn sse_pmaddwd<A, B>(&mut self, op0: A, op1: B)
5847    where Assembler<'a>: SsePmaddwdEmitter<A, B> {
5848        <Self as SsePmaddwdEmitter<A, B>>::sse_pmaddwd(self, op0, op1);
5849    }
5850    /// `SSE_PMAXSW` (PMAXSW). 
5851    /// Performs a SIMD compare of the packed signed byte, word, dword or qword integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
5852    ///
5853    ///
5854    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXSB%3APMAXSW%3APMAXSD%3APMAXSQ.html).
5855    ///
5856    /// Supported operand variants:
5857    ///
5858    /// ```text
5859    /// +---+----------+
5860    /// | # | Operands |
5861    /// +---+----------+
5862    /// | 1 | Xmm, Mem |
5863    /// | 2 | Xmm, Xmm |
5864    /// +---+----------+
5865    /// ```
5866    #[inline]
5867    pub fn sse_pmaxsw<A, B>(&mut self, op0: A, op1: B)
5868    where Assembler<'a>: SsePmaxswEmitter<A, B> {
5869        <Self as SsePmaxswEmitter<A, B>>::sse_pmaxsw(self, op0, op1);
5870    }
5871    /// `SSE_PMAXUB` (PMAXUB). 
5872    /// Performs a SIMD compare of the packed unsigned byte, word integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
5873    ///
5874    ///
5875    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXUB%3APMAXUW.html).
5876    ///
5877    /// Supported operand variants:
5878    ///
5879    /// ```text
5880    /// +---+----------+
5881    /// | # | Operands |
5882    /// +---+----------+
5883    /// | 1 | Xmm, Mem |
5884    /// | 2 | Xmm, Xmm |
5885    /// +---+----------+
5886    /// ```
5887    #[inline]
5888    pub fn sse_pmaxub<A, B>(&mut self, op0: A, op1: B)
5889    where Assembler<'a>: SsePmaxubEmitter<A, B> {
5890        <Self as SsePmaxubEmitter<A, B>>::sse_pmaxub(self, op0, op1);
5891    }
5892    /// `SSE_PMINSW` (PMINSW). 
5893    /// Performs a SIMD compare of the packed signed byte, word, or dword integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
5894    ///
5895    ///
5896    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINSB%3APMINSW.html).
5897    ///
5898    /// Supported operand variants:
5899    ///
5900    /// ```text
5901    /// +---+----------+
5902    /// | # | Operands |
5903    /// +---+----------+
5904    /// | 1 | Xmm, Mem |
5905    /// | 2 | Xmm, Xmm |
5906    /// +---+----------+
5907    /// ```
5908    #[inline]
5909    pub fn sse_pminsw<A, B>(&mut self, op0: A, op1: B)
5910    where Assembler<'a>: SsePminswEmitter<A, B> {
5911        <Self as SsePminswEmitter<A, B>>::sse_pminsw(self, op0, op1);
5912    }
5913    /// `SSE_PMINUB` (PMINUB). 
5914    /// Performs a SIMD compare of the packed unsigned byte or word integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
5915    ///
5916    ///
5917    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINUB%3APMINUW.html).
5918    ///
5919    /// Supported operand variants:
5920    ///
5921    /// ```text
5922    /// +---+----------+
5923    /// | # | Operands |
5924    /// +---+----------+
5925    /// | 1 | Xmm, Mem |
5926    /// | 2 | Xmm, Xmm |
5927    /// +---+----------+
5928    /// ```
5929    #[inline]
5930    pub fn sse_pminub<A, B>(&mut self, op0: A, op1: B)
5931    where Assembler<'a>: SsePminubEmitter<A, B> {
5932        <Self as SsePminubEmitter<A, B>>::sse_pminub(self, op0, op1);
5933    }
5934    /// `SSE_PMOVMSKB` (PMOVMSKB). 
5935    /// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
5936    ///
5937    ///
5938    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
5939    ///
5940    /// Supported operand variants:
5941    ///
5942    /// ```text
5943    /// +---+----------+
5944    /// | # | Operands |
5945    /// +---+----------+
5946    /// | 1 | Gpq, Xmm |
5947    /// +---+----------+
5948    /// ```
5949    #[inline]
5950    pub fn sse_pmovmskb<A, B>(&mut self, op0: A, op1: B)
5951    where Assembler<'a>: SsePmovmskbEmitter<A, B> {
5952        <Self as SsePmovmskbEmitter<A, B>>::sse_pmovmskb(self, op0, op1);
5953    }
5954    /// `SSE_PMULHUW` (PMULHUW). 
5955    /// Performs a SIMD unsigned multiply of the packed unsigned word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each 32-bit intermediate results in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
5956    ///
5957    ///
5958    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHUW.html).
5959    ///
5960    /// Supported operand variants:
5961    ///
5962    /// ```text
5963    /// +---+----------+
5964    /// | # | Operands |
5965    /// +---+----------+
5966    /// | 1 | Xmm, Mem |
5967    /// | 2 | Xmm, Xmm |
5968    /// +---+----------+
5969    /// ```
5970    #[inline]
5971    pub fn sse_pmulhuw<A, B>(&mut self, op0: A, op1: B)
5972    where Assembler<'a>: SsePmulhuwEmitter<A, B> {
5973        <Self as SsePmulhuwEmitter<A, B>>::sse_pmulhuw(self, op0, op1);
5974    }
5975    /// `SSE_PMULHW` (PMULHW). 
5976    /// Performs a SIMD signed multiply of the packed signed word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each intermediate 32-bit result in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
5977    ///
5978    ///
5979    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHW.html).
5980    ///
5981    /// Supported operand variants:
5982    ///
5983    /// ```text
5984    /// +---+----------+
5985    /// | # | Operands |
5986    /// +---+----------+
5987    /// | 1 | Xmm, Mem |
5988    /// | 2 | Xmm, Xmm |
5989    /// +---+----------+
5990    /// ```
5991    #[inline]
5992    pub fn sse_pmulhw<A, B>(&mut self, op0: A, op1: B)
5993    where Assembler<'a>: SsePmulhwEmitter<A, B> {
5994        <Self as SsePmulhwEmitter<A, B>>::sse_pmulhw(self, op0, op1);
5995    }
5996    /// `SSE_PMULLW` (PMULLW). 
5997    /// Performs a SIMD signed multiply of the packed signed word integers in the destination operand (first operand) and the source operand (second operand), and stores the low 16 bits of each intermediate 32-bit result in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
5998    ///
5999    ///
6000    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULLW.html).
6001    ///
6002    /// Supported operand variants:
6003    ///
6004    /// ```text
6005    /// +---+----------+
6006    /// | # | Operands |
6007    /// +---+----------+
6008    /// | 1 | Xmm, Mem |
6009    /// | 2 | Xmm, Xmm |
6010    /// +---+----------+
6011    /// ```
6012    #[inline]
6013    pub fn sse_pmullw<A, B>(&mut self, op0: A, op1: B)
6014    where Assembler<'a>: SsePmullwEmitter<A, B> {
6015        <Self as SsePmullwEmitter<A, B>>::sse_pmullw(self, op0, op1);
6016    }
6017    /// `SSE_PMULUDQ` (PMULUDQ). 
6018    /// Multiplies the first operand (destination operand) by the second operand (source operand) and stores the result in the destination operand.
6019    ///
6020    ///
6021    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULUDQ.html).
6022    ///
6023    /// Supported operand variants:
6024    ///
6025    /// ```text
6026    /// +---+----------+
6027    /// | # | Operands |
6028    /// +---+----------+
6029    /// | 1 | Xmm, Mem |
6030    /// | 2 | Xmm, Xmm |
6031    /// +---+----------+
6032    /// ```
6033    #[inline]
6034    pub fn sse_pmuludq<A, B>(&mut self, op0: A, op1: B)
6035    where Assembler<'a>: SsePmuludqEmitter<A, B> {
6036        <Self as SsePmuludqEmitter<A, B>>::sse_pmuludq(self, op0, op1);
6037    }
6038    /// `SSE_POR` (POR). 
6039    /// Performs a bitwise logical OR operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is set to 1 if either or both of the corresponding bits of the first and second operands are 1; otherwise, it is set to 0.
6040    ///
6041    ///
6042    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/POR.html).
6043    ///
6044    /// Supported operand variants:
6045    ///
6046    /// ```text
6047    /// +---+----------+
6048    /// | # | Operands |
6049    /// +---+----------+
6050    /// | 1 | Xmm, Mem |
6051    /// | 2 | Xmm, Xmm |
6052    /// +---+----------+
6053    /// ```
6054    #[inline]
6055    pub fn sse_por<A, B>(&mut self, op0: A, op1: B)
6056    where Assembler<'a>: SsePorEmitter<A, B> {
6057        <Self as SsePorEmitter<A, B>>::sse_por(self, op0, op1);
6058    }
6059    /// `SSE_PSADBW` (PSADBW). 
6060    /// Computes the absolute value of the difference of 8 unsigned byte integers from the source operand (second operand) and from the destination operand (first operand). These 8 differences are then summed to produce an unsigned word integer result that is stored in the destination operand. Figure 4-14 shows the operation of the PSADBW instruction when using 64-bit operands.
6061    ///
6062    ///
6063    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSADBW.html).
6064    ///
6065    /// Supported operand variants:
6066    ///
6067    /// ```text
6068    /// +---+----------+
6069    /// | # | Operands |
6070    /// +---+----------+
6071    /// | 1 | Xmm, Mem |
6072    /// | 2 | Xmm, Xmm |
6073    /// +---+----------+
6074    /// ```
6075    #[inline]
6076    pub fn sse_psadbw<A, B>(&mut self, op0: A, op1: B)
6077    where Assembler<'a>: SsePsadbwEmitter<A, B> {
6078        <Self as SsePsadbwEmitter<A, B>>::sse_psadbw(self, op0, op1);
6079    }
6080    /// `SSE_PSHUFD` (PSHUFD). 
6081    /// Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). Figure 4-16 shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in Figure 4-16) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.
6082    ///
6083    ///
6084    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFD.html).
6085    ///
6086    /// Supported operand variants:
6087    ///
6088    /// ```text
6089    /// +---+---------------+
6090    /// | # | Operands      |
6091    /// +---+---------------+
6092    /// | 1 | Xmm, Mem, Imm |
6093    /// | 2 | Xmm, Xmm, Imm |
6094    /// +---+---------------+
6095    /// ```
6096    #[inline]
6097    pub fn sse_pshufd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
6098    where Assembler<'a>: SsePshufdEmitter<A, B, C> {
6099        <Self as SsePshufdEmitter<A, B, C>>::sse_pshufd(self, op0, op1, op2);
6100    }
6101    /// `SSE_PSHUFHW` (PSHUFHW). 
6102    /// Copies words from the high quadword of a 128-bit lane of the source operand and inserts them in the high quadword of the destination operand at word locations (of the respective lane) selected with the immediate operand. This 256-bit operation is similar to the in-lane operation used by the 256-bit VPSHUFD instruction, which is illustrated in Figure 4-16. For 128-bit operation, only the low 128-bit lane is operative. Each 2-bit field in the immediate operand selects the contents of one word location in the high quadword of the destination operand. The binary encodings of the immediate operand fields select words (0, 1, 2 or 3, 4) from the high quadword of the source operand to be copied to the destination operand. The low quadword of the source operand is copied to the low quadword of the destination operand, for each 128-bit lane.
6103    ///
6104    ///
6105    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFHW.html).
6106    ///
6107    /// Supported operand variants:
6108    ///
6109    /// ```text
6110    /// +---+---------------+
6111    /// | # | Operands      |
6112    /// +---+---------------+
6113    /// | 1 | Xmm, Mem, Imm |
6114    /// | 2 | Xmm, Xmm, Imm |
6115    /// +---+---------------+
6116    /// ```
6117    #[inline]
6118    pub fn sse_pshufhw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
6119    where Assembler<'a>: SsePshufhwEmitter<A, B, C> {
6120        <Self as SsePshufhwEmitter<A, B, C>>::sse_pshufhw(self, op0, op1, op2);
6121    }
6122    /// `SSE_PSHUFLW` (PSHUFLW). 
6123    /// Copies words from the low quadword of a 128-bit lane of the source operand and inserts them in the low quadword of the destination operand at word locations (of the respective lane) selected with the immediate operand. The 256-bit operation is similar to the in-lane operation used by the 256-bit VPSHUFD instruction, which is illustrated in Figure 4-16. For 128-bit operation, only the low 128-bit lane is operative. Each 2-bit field in the immediate operand selects the contents of one word location in the low quadword of the destination operand. The binary encodings of the immediate operand fields select words (0, 1, 2 or 3) from the low quadword of the source operand to be copied to the destination operand. The high quadword of the source operand is copied to the high quadword of the destination operand, for each 128-bit lane.
6124    ///
6125    ///
6126    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFLW.html).
6127    ///
6128    /// Supported operand variants:
6129    ///
6130    /// ```text
6131    /// +---+---------------+
6132    /// | # | Operands      |
6133    /// +---+---------------+
6134    /// | 1 | Xmm, Mem, Imm |
6135    /// | 2 | Xmm, Xmm, Imm |
6136    /// +---+---------------+
6137    /// ```
6138    #[inline]
6139    pub fn sse_pshuflw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
6140    where Assembler<'a>: SsePshuflwEmitter<A, B, C> {
6141        <Self as SsePshuflwEmitter<A, B, C>>::sse_pshuflw(self, op0, op1, op2);
6142    }
6143    /// `SSE_PSLLD` (PSLLD). 
6144    /// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the left by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted left, the empty low-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-17 gives an example of shifting words in a 64-bit operand.
6145    ///
6146    ///
6147    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLW%3APSLLD%3APSLLQ.html).
6148    ///
6149    /// Supported operand variants:
6150    ///
6151    /// ```text
6152    /// +---+----------+
6153    /// | # | Operands |
6154    /// +---+----------+
6155    /// | 1 | Xmm, Imm |
6156    /// | 2 | Xmm, Mem |
6157    /// | 3 | Xmm, Xmm |
6158    /// +---+----------+
6159    /// ```
6160    #[inline]
6161    pub fn sse_pslld<A, B>(&mut self, op0: A, op1: B)
6162    where Assembler<'a>: SsePslldEmitter<A, B> {
6163        <Self as SsePslldEmitter<A, B>>::sse_pslld(self, op0, op1);
6164    }
6165    /// `SSE_PSLLDQ` (PSLLDQ). 
6166    /// Shifts the destination operand (first operand) to the left by the number of bytes specified in the count operand (second operand). The empty low-order bytes are cleared (set to all 0s). If the value specified by the count operand is greater than 15, the destination operand is set to all 0s. The count operand is an 8-bit immediate.
6167    ///
6168    ///
6169    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLDQ.html).
6170    ///
6171    /// Supported operand variants:
6172    ///
6173    /// ```text
6174    /// +---+----------+
6175    /// | # | Operands |
6176    /// +---+----------+
6177    /// | 1 | Xmm, Imm |
6178    /// +---+----------+
6179    /// ```
6180    #[inline]
6181    pub fn sse_pslldq<A, B>(&mut self, op0: A, op1: B)
6182    where Assembler<'a>: SsePslldqEmitter<A, B> {
6183        <Self as SsePslldqEmitter<A, B>>::sse_pslldq(self, op0, op1);
6184    }
6185    /// `SSE_PSLLQ` (PSLLQ). 
6186    /// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the left by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted left, the empty low-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-17 gives an example of shifting words in a 64-bit operand.
6187    ///
6188    ///
6189    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLW%3APSLLD%3APSLLQ.html).
6190    ///
6191    /// Supported operand variants:
6192    ///
6193    /// ```text
6194    /// +---+----------+
6195    /// | # | Operands |
6196    /// +---+----------+
6197    /// | 1 | Xmm, Imm |
6198    /// | 2 | Xmm, Mem |
6199    /// | 3 | Xmm, Xmm |
6200    /// +---+----------+
6201    /// ```
6202    #[inline]
6203    pub fn sse_psllq<A, B>(&mut self, op0: A, op1: B)
6204    where Assembler<'a>: SsePsllqEmitter<A, B> {
6205        <Self as SsePsllqEmitter<A, B>>::sse_psllq(self, op0, op1);
6206    }
6207    /// `SSE_PSLLW` (PSLLW). 
6208    /// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the left by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted left, the empty low-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-17 gives an example of shifting words in a 64-bit operand.
6209    ///
6210    ///
6211    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSLLW%3APSLLD%3APSLLQ.html).
6212    ///
6213    /// Supported operand variants:
6214    ///
6215    /// ```text
6216    /// +---+----------+
6217    /// | # | Operands |
6218    /// +---+----------+
6219    /// | 1 | Xmm, Imm |
6220    /// | 2 | Xmm, Mem |
6221    /// | 3 | Xmm, Xmm |
6222    /// +---+----------+
6223    /// ```
6224    #[inline]
6225    pub fn sse_psllw<A, B>(&mut self, op0: A, op1: B)
6226    where Assembler<'a>: SsePsllwEmitter<A, B> {
6227        <Self as SsePsllwEmitter<A, B>>::sse_psllw(self, op0, op1);
6228    }
6229    /// `SSE_PSRAD` (PSRAD). 
6230    /// Shifts the bits in the individual data elements (words, doublewords or quadwords) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are filled with the initial value of the sign bit of the data element. If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for quadwords), each destination data element is filled with the initial value of the sign bit of the element. (Figure 4-18 gives an example of shifting words in a 64-bit operand.)
6231    ///
6232    ///
6233    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRAW%3APSRAD%3APSRAQ.html).
6234    ///
6235    /// Supported operand variants:
6236    ///
6237    /// ```text
6238    /// +---+----------+
6239    /// | # | Operands |
6240    /// +---+----------+
6241    /// | 1 | Xmm, Imm |
6242    /// | 2 | Xmm, Mem |
6243    /// | 3 | Xmm, Xmm |
6244    /// +---+----------+
6245    /// ```
6246    #[inline]
6247    pub fn sse_psrad<A, B>(&mut self, op0: A, op1: B)
6248    where Assembler<'a>: SsePsradEmitter<A, B> {
6249        <Self as SsePsradEmitter<A, B>>::sse_psrad(self, op0, op1);
6250    }
6251    /// `SSE_PSRAW` (PSRAW). 
6252    /// Shifts the bits in the individual data elements (words, doublewords or quadwords) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are filled with the initial value of the sign bit of the data element. If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for quadwords), each destination data element is filled with the initial value of the sign bit of the element. (Figure 4-18 gives an example of shifting words in a 64-bit operand.)
6253    ///
6254    ///
6255    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRAW%3APSRAD%3APSRAQ.html).
6256    ///
6257    /// Supported operand variants:
6258    ///
6259    /// ```text
6260    /// +---+----------+
6261    /// | # | Operands |
6262    /// +---+----------+
6263    /// | 1 | Xmm, Imm |
6264    /// | 2 | Xmm, Mem |
6265    /// | 3 | Xmm, Xmm |
6266    /// +---+----------+
6267    /// ```
6268    #[inline]
6269    pub fn sse_psraw<A, B>(&mut self, op0: A, op1: B)
6270    where Assembler<'a>: SsePsrawEmitter<A, B> {
6271        <Self as SsePsrawEmitter<A, B>>::sse_psraw(self, op0, op1);
6272    }
6273    /// `SSE_PSRLD` (PSRLD). 
6274    /// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-19 gives an example of shifting words in a 64-bit operand.
6275    ///
6276    ///
6277    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLW%3APSRLD%3APSRLQ.html).
6278    ///
6279    /// Supported operand variants:
6280    ///
6281    /// ```text
6282    /// +---+----------+
6283    /// | # | Operands |
6284    /// +---+----------+
6285    /// | 1 | Xmm, Imm |
6286    /// | 2 | Xmm, Mem |
6287    /// | 3 | Xmm, Xmm |
6288    /// +---+----------+
6289    /// ```
6290    #[inline]
6291    pub fn sse_psrld<A, B>(&mut self, op0: A, op1: B)
6292    where Assembler<'a>: SsePsrldEmitter<A, B> {
6293        <Self as SsePsrldEmitter<A, B>>::sse_psrld(self, op0, op1);
6294    }
6295    /// `SSE_PSRLDQ` (PSRLDQ). 
6296    /// Shifts the destination operand (first operand) to the right by the number of bytes specified in the count operand (second operand). The empty high-order bytes are cleared (set to all 0s). If the value specified by the count operand is greater than 15, the destination operand is set to all 0s. The count operand is an 8-bit immediate.
6297    ///
6298    ///
6299    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLDQ.html).
6300    ///
6301    /// Supported operand variants:
6302    ///
6303    /// ```text
6304    /// +---+----------+
6305    /// | # | Operands |
6306    /// +---+----------+
6307    /// | 1 | Xmm, Imm |
6308    /// +---+----------+
6309    /// ```
6310    #[inline]
6311    pub fn sse_psrldq<A, B>(&mut self, op0: A, op1: B)
6312    where Assembler<'a>: SsePsrldqEmitter<A, B> {
6313        <Self as SsePsrldqEmitter<A, B>>::sse_psrldq(self, op0, op1);
6314    }
6315    /// `SSE_PSRLQ` (PSRLQ). 
6316    /// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-19 gives an example of shifting words in a 64-bit operand.
6317    ///
6318    ///
6319    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLW%3APSRLD%3APSRLQ.html).
6320    ///
6321    /// Supported operand variants:
6322    ///
6323    /// ```text
6324    /// +---+----------+
6325    /// | # | Operands |
6326    /// +---+----------+
6327    /// | 1 | Xmm, Imm |
6328    /// | 2 | Xmm, Mem |
6329    /// | 3 | Xmm, Xmm |
6330    /// +---+----------+
6331    /// ```
6332    #[inline]
6333    pub fn sse_psrlq<A, B>(&mut self, op0: A, op1: B)
6334    where Assembler<'a>: SsePsrlqEmitter<A, B> {
6335        <Self as SsePsrlqEmitter<A, B>>::sse_psrlq(self, op0, op1);
6336    }
6337    /// `SSE_PSRLW` (PSRLW). 
6338    /// Shifts the bits in the individual data elements (words, doublewords, or quadword) in the destination operand (first operand) to the right by the number of bits specified in the count operand (second operand). As the bits in the data elements are shifted right, the empty high-order bits are cleared (set to 0). If the value specified by the count operand is greater than 15 (for words), 31 (for doublewords), or 63 (for a quadword), then the destination operand is set to all 0s. Figure 4-19 gives an example of shifting words in a 64-bit operand.
6339    ///
6340    ///
6341    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSRLW%3APSRLD%3APSRLQ.html).
6342    ///
6343    /// Supported operand variants:
6344    ///
6345    /// ```text
6346    /// +---+----------+
6347    /// | # | Operands |
6348    /// +---+----------+
6349    /// | 1 | Xmm, Imm |
6350    /// | 2 | Xmm, Mem |
6351    /// | 3 | Xmm, Xmm |
6352    /// +---+----------+
6353    /// ```
6354    #[inline]
6355    pub fn sse_psrlw<A, B>(&mut self, op0: A, op1: B)
6356    where Assembler<'a>: SsePsrlwEmitter<A, B> {
6357        <Self as SsePsrlwEmitter<A, B>>::sse_psrlw(self, op0, op1);
6358    }
6359    /// `SSE_PSUBB` (PSUBB). 
6360    /// Performs a SIMD subtract of the packed integers of the source operand (second operand) from the packed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
6361    ///
6362    ///
6363    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBB%3APSUBW%3APSUBD.html).
6364    ///
6365    /// Supported operand variants:
6366    ///
6367    /// ```text
6368    /// +---+----------+
6369    /// | # | Operands |
6370    /// +---+----------+
6371    /// | 1 | Xmm, Mem |
6372    /// | 2 | Xmm, Xmm |
6373    /// +---+----------+
6374    /// ```
6375    #[inline]
6376    pub fn sse_psubb<A, B>(&mut self, op0: A, op1: B)
6377    where Assembler<'a>: SsePsubbEmitter<A, B> {
6378        <Self as SsePsubbEmitter<A, B>>::sse_psubb(self, op0, op1);
6379    }
6380    /// `SSE_PSUBD` (PSUBD). 
6381    /// Performs a SIMD subtract of the packed integers of the source operand (second operand) from the packed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
6382    ///
6383    ///
6384    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBB%3APSUBW%3APSUBD.html).
6385    ///
6386    /// Supported operand variants:
6387    ///
6388    /// ```text
6389    /// +---+----------+
6390    /// | # | Operands |
6391    /// +---+----------+
6392    /// | 1 | Xmm, Mem |
6393    /// | 2 | Xmm, Xmm |
6394    /// +---+----------+
6395    /// ```
6396    #[inline]
6397    pub fn sse_psubd<A, B>(&mut self, op0: A, op1: B)
6398    where Assembler<'a>: SsePsubdEmitter<A, B> {
6399        <Self as SsePsubdEmitter<A, B>>::sse_psubd(self, op0, op1);
6400    }
6401    /// `SSE_PSUBQ` (PSUBQ). 
6402    /// Subtracts the second operand (source operand) from the first operand (destination operand) and stores the result in the destination operand. When packed quadword operands are used, a SIMD subtract is performed. When a quadword result is too large to be represented in 64 bits (overflow), the result is wrapped around and the low 64 bits are written to the destination element (that is, the carry is ignored).
6403    ///
6404    ///
6405    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBQ.html).
6406    ///
6407    /// Supported operand variants:
6408    ///
6409    /// ```text
6410    /// +---+----------+
6411    /// | # | Operands |
6412    /// +---+----------+
6413    /// | 1 | Xmm, Mem |
6414    /// | 2 | Xmm, Xmm |
6415    /// +---+----------+
6416    /// ```
6417    #[inline]
6418    pub fn sse_psubq<A, B>(&mut self, op0: A, op1: B)
6419    where Assembler<'a>: SsePsubqEmitter<A, B> {
6420        <Self as SsePsubqEmitter<A, B>>::sse_psubq(self, op0, op1);
6421    }
6422    /// `SSE_PSUBSB` (PSUBSB). 
6423    /// Performs a SIMD subtract of the packed signed integers of the source operand (second operand) from the packed signed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
6424    ///
6425    ///
6426    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBSB%3APSUBSW.html).
6427    ///
6428    /// Supported operand variants:
6429    ///
6430    /// ```text
6431    /// +---+----------+
6432    /// | # | Operands |
6433    /// +---+----------+
6434    /// | 1 | Xmm, Mem |
6435    /// | 2 | Xmm, Xmm |
6436    /// +---+----------+
6437    /// ```
6438    #[inline]
6439    pub fn sse_psubsb<A, B>(&mut self, op0: A, op1: B)
6440    where Assembler<'a>: SsePsubsbEmitter<A, B> {
6441        <Self as SsePsubsbEmitter<A, B>>::sse_psubsb(self, op0, op1);
6442    }
6443    /// `SSE_PSUBSW` (PSUBSW). 
6444    /// Performs a SIMD subtract of the packed signed integers of the source operand (second operand) from the packed signed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with signed saturation, as described in the following paragraphs.
6445    ///
6446    ///
6447    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBSB%3APSUBSW.html).
6448    ///
6449    /// Supported operand variants:
6450    ///
6451    /// ```text
6452    /// +---+----------+
6453    /// | # | Operands |
6454    /// +---+----------+
6455    /// | 1 | Xmm, Mem |
6456    /// | 2 | Xmm, Xmm |
6457    /// +---+----------+
6458    /// ```
6459    #[inline]
6460    pub fn sse_psubsw<A, B>(&mut self, op0: A, op1: B)
6461    where Assembler<'a>: SsePsubswEmitter<A, B> {
6462        <Self as SsePsubswEmitter<A, B>>::sse_psubsw(self, op0, op1);
6463    }
6464    /// `SSE_PSUBUSB` (PSUBUSB). 
6465    /// Performs a SIMD subtract of the packed unsigned integers of the source operand (second operand) from the packed unsigned integers of the destination operand (first operand), and stores the packed unsigned integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
6466    ///
6467    ///
6468    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBUSB%3APSUBUSW.html).
6469    ///
6470    /// Supported operand variants:
6471    ///
6472    /// ```text
6473    /// +---+----------+
6474    /// | # | Operands |
6475    /// +---+----------+
6476    /// | 1 | Xmm, Mem |
6477    /// | 2 | Xmm, Xmm |
6478    /// +---+----------+
6479    /// ```
6480    #[inline]
6481    pub fn sse_psubusb<A, B>(&mut self, op0: A, op1: B)
6482    where Assembler<'a>: SsePsubusbEmitter<A, B> {
6483        <Self as SsePsubusbEmitter<A, B>>::sse_psubusb(self, op0, op1);
6484    }
6485    /// `SSE_PSUBUSW` (PSUBUSW). 
6486    /// Performs a SIMD subtract of the packed unsigned integers of the source operand (second operand) from the packed unsigned integers of the destination operand (first operand), and stores the packed unsigned integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with unsigned saturation, as described in the following paragraphs.
6487    ///
6488    ///
6489    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBUSB%3APSUBUSW.html).
6490    ///
6491    /// Supported operand variants:
6492    ///
6493    /// ```text
6494    /// +---+----------+
6495    /// | # | Operands |
6496    /// +---+----------+
6497    /// | 1 | Xmm, Mem |
6498    /// | 2 | Xmm, Xmm |
6499    /// +---+----------+
6500    /// ```
6501    #[inline]
6502    pub fn sse_psubusw<A, B>(&mut self, op0: A, op1: B)
6503    where Assembler<'a>: SsePsubuswEmitter<A, B> {
6504        <Self as SsePsubuswEmitter<A, B>>::sse_psubusw(self, op0, op1);
6505    }
6506    /// `SSE_PSUBW` (PSUBW). 
6507    /// Performs a SIMD subtract of the packed integers of the source operand (second operand) from the packed integers of the destination operand (first operand), and stores the packed integer results in the destination operand. See Figure 9-4 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD operation. Overflow is handled with wraparound, as described in the following paragraphs.
6508    ///
6509    ///
6510    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSUBB%3APSUBW%3APSUBD.html).
6511    ///
6512    /// Supported operand variants:
6513    ///
6514    /// ```text
6515    /// +---+----------+
6516    /// | # | Operands |
6517    /// +---+----------+
6518    /// | 1 | Xmm, Mem |
6519    /// | 2 | Xmm, Xmm |
6520    /// +---+----------+
6521    /// ```
6522    #[inline]
6523    pub fn sse_psubw<A, B>(&mut self, op0: A, op1: B)
6524    where Assembler<'a>: SsePsubwEmitter<A, B> {
6525        <Self as SsePsubwEmitter<A, B>>::sse_psubw(self, op0, op1);
6526    }
6527    /// `SSE_PUNPCKHBW` (PUNPCKHBW). 
6528    /// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
6529    ///
6530    ///
6531    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
6532    ///
6533    /// Supported operand variants:
6534    ///
6535    /// ```text
6536    /// +---+----------+
6537    /// | # | Operands |
6538    /// +---+----------+
6539    /// | 1 | Xmm, Mem |
6540    /// | 2 | Xmm, Xmm |
6541    /// +---+----------+
6542    /// ```
6543    #[inline]
6544    pub fn sse_punpckhbw<A, B>(&mut self, op0: A, op1: B)
6545    where Assembler<'a>: SsePunpckhbwEmitter<A, B> {
6546        <Self as SsePunpckhbwEmitter<A, B>>::sse_punpckhbw(self, op0, op1);
6547    }
6548    /// `SSE_PUNPCKHDQ` (PUNPCKHDQ). 
6549    /// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
6550    ///
6551    ///
6552    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
6553    ///
6554    /// Supported operand variants:
6555    ///
6556    /// ```text
6557    /// +---+----------+
6558    /// | # | Operands |
6559    /// +---+----------+
6560    /// | 1 | Xmm, Mem |
6561    /// | 2 | Xmm, Xmm |
6562    /// +---+----------+
6563    /// ```
6564    #[inline]
6565    pub fn sse_punpckhdq<A, B>(&mut self, op0: A, op1: B)
6566    where Assembler<'a>: SsePunpckhdqEmitter<A, B> {
6567        <Self as SsePunpckhdqEmitter<A, B>>::sse_punpckhdq(self, op0, op1);
6568    }
6569    /// `SSE_PUNPCKHQDQ` (PUNPCKHQDQ). 
6570    /// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
6571    ///
6572    ///
6573    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
6574    ///
6575    /// Supported operand variants:
6576    ///
6577    /// ```text
6578    /// +---+----------+
6579    /// | # | Operands |
6580    /// +---+----------+
6581    /// | 1 | Xmm, Mem |
6582    /// | 2 | Xmm, Xmm |
6583    /// +---+----------+
6584    /// ```
6585    #[inline]
6586    pub fn sse_punpckhqdq<A, B>(&mut self, op0: A, op1: B)
6587    where Assembler<'a>: SsePunpckhqdqEmitter<A, B> {
6588        <Self as SsePunpckhqdqEmitter<A, B>>::sse_punpckhqdq(self, op0, op1);
6589    }
6590    /// `SSE_PUNPCKHWD` (PUNPCKHWD). 
6591    /// Unpacks and interleaves the high-order data elements (bytes, words, doublewords, or quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. Figure 4-20 shows the unpack operation for bytes in 64-bit operands. The low-order data elements are ignored.
6592    ///
6593    ///
6594    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKHBW%3APUNPCKHWD%3APUNPCKHDQ%3APUNPCKHQDQ.html).
6595    ///
6596    /// Supported operand variants:
6597    ///
6598    /// ```text
6599    /// +---+----------+
6600    /// | # | Operands |
6601    /// +---+----------+
6602    /// | 1 | Xmm, Mem |
6603    /// | 2 | Xmm, Xmm |
6604    /// +---+----------+
6605    /// ```
6606    #[inline]
6607    pub fn sse_punpckhwd<A, B>(&mut self, op0: A, op1: B)
6608    where Assembler<'a>: SsePunpckhwdEmitter<A, B> {
6609        <Self as SsePunpckhwdEmitter<A, B>>::sse_punpckhwd(self, op0, op1);
6610    }
6611    /// `SSE_PUNPCKLBW` (PUNPCKLBW). 
6612    /// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
6613    ///
6614    ///
6615    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
6616    ///
6617    /// Supported operand variants:
6618    ///
6619    /// ```text
6620    /// +---+----------+
6621    /// | # | Operands |
6622    /// +---+----------+
6623    /// | 1 | Xmm, Mem |
6624    /// | 2 | Xmm, Xmm |
6625    /// +---+----------+
6626    /// ```
6627    #[inline]
6628    pub fn sse_punpcklbw<A, B>(&mut self, op0: A, op1: B)
6629    where Assembler<'a>: SsePunpcklbwEmitter<A, B> {
6630        <Self as SsePunpcklbwEmitter<A, B>>::sse_punpcklbw(self, op0, op1);
6631    }
6632    /// `SSE_PUNPCKLDQ` (PUNPCKLDQ). 
6633    /// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
6634    ///
6635    ///
6636    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
6637    ///
6638    /// Supported operand variants:
6639    ///
6640    /// ```text
6641    /// +---+----------+
6642    /// | # | Operands |
6643    /// +---+----------+
6644    /// | 1 | Xmm, Mem |
6645    /// | 2 | Xmm, Xmm |
6646    /// +---+----------+
6647    /// ```
6648    #[inline]
6649    pub fn sse_punpckldq<A, B>(&mut self, op0: A, op1: B)
6650    where Assembler<'a>: SsePunpckldqEmitter<A, B> {
6651        <Self as SsePunpckldqEmitter<A, B>>::sse_punpckldq(self, op0, op1);
6652    }
6653    /// `SSE_PUNPCKLQDQ` (PUNPCKLQDQ). 
6654    /// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
6655    ///
6656    ///
6657    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
6658    ///
6659    /// Supported operand variants:
6660    ///
6661    /// ```text
6662    /// +---+----------+
6663    /// | # | Operands |
6664    /// +---+----------+
6665    /// | 1 | Xmm, Mem |
6666    /// | 2 | Xmm, Xmm |
6667    /// +---+----------+
6668    /// ```
6669    #[inline]
6670    pub fn sse_punpcklqdq<A, B>(&mut self, op0: A, op1: B)
6671    where Assembler<'a>: SsePunpcklqdqEmitter<A, B> {
6672        <Self as SsePunpcklqdqEmitter<A, B>>::sse_punpcklqdq(self, op0, op1);
6673    }
6674    /// `SSE_PUNPCKLWD` (PUNPCKLWD). 
6675    /// Unpacks and interleaves the low-order data elements (bytes, words, doublewords, and quadwords) of the destination operand (first operand) and source operand (second operand) into the destination operand. (Figure 4-22 shows the unpack operation for bytes in 64-bit operands.). The high-order data elements are ignored.
6676    ///
6677    ///
6678    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PUNPCKLBW%3APUNPCKLWD%3APUNPCKLDQ%3APUNPCKLQDQ.html).
6679    ///
6680    /// Supported operand variants:
6681    ///
6682    /// ```text
6683    /// +---+----------+
6684    /// | # | Operands |
6685    /// +---+----------+
6686    /// | 1 | Xmm, Mem |
6687    /// | 2 | Xmm, Xmm |
6688    /// +---+----------+
6689    /// ```
6690    #[inline]
6691    pub fn sse_punpcklwd<A, B>(&mut self, op0: A, op1: B)
6692    where Assembler<'a>: SsePunpcklwdEmitter<A, B> {
6693        <Self as SsePunpcklwdEmitter<A, B>>::sse_punpcklwd(self, op0, op1);
6694    }
6695    /// `SSE_PXOR` (PXOR). 
6696    /// Performs a bitwise logical exclusive-OR (XOR) operation on the source operand (second operand) and the destination operand (first operand) and stores the result in the destination operand. Each bit of the result is 1 if the corresponding bits of the two operands are different; each bit is 0 if the corresponding bits of the operands are the same.
6697    ///
6698    ///
6699    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PXOR.html).
6700    ///
6701    /// Supported operand variants:
6702    ///
6703    /// ```text
6704    /// +---+----------+
6705    /// | # | Operands |
6706    /// +---+----------+
6707    /// | 1 | Xmm, Mem |
6708    /// | 2 | Xmm, Xmm |
6709    /// +---+----------+
6710    /// ```
6711    #[inline]
6712    pub fn sse_pxor<A, B>(&mut self, op0: A, op1: B)
6713    where Assembler<'a>: SsePxorEmitter<A, B> {
6714        <Self as SsePxorEmitter<A, B>>::sse_pxor(self, op0, op1);
6715    }
6716    /// `SSE_SHUFPD` (SHUFPD). 
6717    /// Selects a double precision floating-point value of an input pair using a bit control and move to a designated element of the destination operand. The low-to-high order of double precision element of the destination operand is interleaved between the first source operand and the second source operand at the granularity of input pair of 128 bits. Each bit in the imm8 byte, starting from bit 0, is the select control of the corresponding element of the destination to received the shuffled result of an input pair.
6718    ///
6719    ///
6720    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SHUFPD.html).
6721    ///
6722    /// Supported operand variants:
6723    ///
6724    /// ```text
6725    /// +---+---------------+
6726    /// | # | Operands      |
6727    /// +---+---------------+
6728    /// | 1 | Xmm, Mem, Imm |
6729    /// | 2 | Xmm, Xmm, Imm |
6730    /// +---+---------------+
6731    /// ```
6732    #[inline]
6733    pub fn sse_shufpd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
6734    where Assembler<'a>: SseShufpdEmitter<A, B, C> {
6735        <Self as SseShufpdEmitter<A, B, C>>::sse_shufpd(self, op0, op1, op2);
6736    }
6737    /// `SSE_SQRTPD` (SQRTPD). 
6738    /// Performs a SIMD computation of the square roots of the two, four or eight packed double precision floating-point values in the source operand (the second operand) stores the packed double precision floating-point results in the destination operand (the first operand).
6739    ///
6740    ///
6741    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTPD.html).
6742    ///
6743    /// Supported operand variants:
6744    ///
6745    /// ```text
6746    /// +---+----------+
6747    /// | # | Operands |
6748    /// +---+----------+
6749    /// | 1 | Xmm, Mem |
6750    /// | 2 | Xmm, Xmm |
6751    /// +---+----------+
6752    /// ```
6753    #[inline]
6754    pub fn sse_sqrtpd<A, B>(&mut self, op0: A, op1: B)
6755    where Assembler<'a>: SseSqrtpdEmitter<A, B> {
6756        <Self as SseSqrtpdEmitter<A, B>>::sse_sqrtpd(self, op0, op1);
6757    }
6758    /// `SSE_SQRTSD` (SQRTSD). 
6759    /// Computes the square root of the low double precision floating-point value in the second source operand and stores the double precision floating-point result in the destination operand. The second source operand can be an XMM register or a 64-bit memory location. The first source and destination operands are XMM registers.
6760    ///
6761    ///
6762    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTSD.html).
6763    ///
6764    /// Supported operand variants:
6765    ///
6766    /// ```text
6767    /// +---+----------+
6768    /// | # | Operands |
6769    /// +---+----------+
6770    /// | 1 | Xmm, Mem |
6771    /// | 2 | Xmm, Xmm |
6772    /// +---+----------+
6773    /// ```
6774    #[inline]
6775    pub fn sse_sqrtsd<A, B>(&mut self, op0: A, op1: B)
6776    where Assembler<'a>: SseSqrtsdEmitter<A, B> {
6777        <Self as SseSqrtsdEmitter<A, B>>::sse_sqrtsd(self, op0, op1);
6778    }
6779    /// `SSE_SUBPD` (SUBPD). 
6780    /// Performs a SIMD subtract of the two, four or eight packed double precision floating-point values of the second Source operand from the first Source operand, and stores the packed double precision floating-point results in the destination operand.
6781    ///
6782    ///
6783    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBPD.html).
6784    ///
6785    /// Supported operand variants:
6786    ///
6787    /// ```text
6788    /// +---+----------+
6789    /// | # | Operands |
6790    /// +---+----------+
6791    /// | 1 | Xmm, Mem |
6792    /// | 2 | Xmm, Xmm |
6793    /// +---+----------+
6794    /// ```
6795    #[inline]
6796    pub fn sse_subpd<A, B>(&mut self, op0: A, op1: B)
6797    where Assembler<'a>: SseSubpdEmitter<A, B> {
6798        <Self as SseSubpdEmitter<A, B>>::sse_subpd(self, op0, op1);
6799    }
6800    /// `SSE_SUBSD` (SUBSD). 
6801    /// Subtract the low double precision floating-point value in the second source operand from the first source operand and stores the double precision floating-point result in the low quadword of the destination operand.
6802    ///
6803    ///
6804    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBSD.html).
6805    ///
6806    /// Supported operand variants:
6807    ///
6808    /// ```text
6809    /// +---+----------+
6810    /// | # | Operands |
6811    /// +---+----------+
6812    /// | 1 | Xmm, Mem |
6813    /// | 2 | Xmm, Xmm |
6814    /// +---+----------+
6815    /// ```
6816    #[inline]
6817    pub fn sse_subsd<A, B>(&mut self, op0: A, op1: B)
6818    where Assembler<'a>: SseSubsdEmitter<A, B> {
6819        <Self as SseSubsdEmitter<A, B>>::sse_subsd(self, op0, op1);
6820    }
6821    /// `SSE_UCOMISD` (UCOMISD). 
6822    /// Performs an unordered compare of the double precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
6823    ///
6824    ///
6825    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UCOMISD.html).
6826    ///
6827    /// Supported operand variants:
6828    ///
6829    /// ```text
6830    /// +---+----------+
6831    /// | # | Operands |
6832    /// +---+----------+
6833    /// | 1 | Xmm, Mem |
6834    /// | 2 | Xmm, Xmm |
6835    /// +---+----------+
6836    /// ```
6837    #[inline]
6838    pub fn sse_ucomisd<A, B>(&mut self, op0: A, op1: B)
6839    where Assembler<'a>: SseUcomisdEmitter<A, B> {
6840        <Self as SseUcomisdEmitter<A, B>>::sse_ucomisd(self, op0, op1);
6841    }
6842    /// `SSE_UNPCKHPD` (UNPCKHPD). 
6843    /// Performs an interleaved unpack of the high double precision floating-point values from the first source operand and the second source operand. See Figure 4-15 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 2B.
6844    ///
6845    ///
6846    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKHPD.html).
6847    ///
6848    /// Supported operand variants:
6849    ///
6850    /// ```text
6851    /// +---+----------+
6852    /// | # | Operands |
6853    /// +---+----------+
6854    /// | 1 | Xmm, Mem |
6855    /// | 2 | Xmm, Xmm |
6856    /// +---+----------+
6857    /// ```
6858    #[inline]
6859    pub fn sse_unpckhpd<A, B>(&mut self, op0: A, op1: B)
6860    where Assembler<'a>: SseUnpckhpdEmitter<A, B> {
6861        <Self as SseUnpckhpdEmitter<A, B>>::sse_unpckhpd(self, op0, op1);
6862    }
6863    /// `SSE_UNPCKLPD` (UNPCKLPD). 
6864    /// Performs an interleaved unpack of the low double precision floating-point values from the first source operand and the second source operand.
6865    ///
6866    ///
6867    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKLPD.html).
6868    ///
6869    /// Supported operand variants:
6870    ///
6871    /// ```text
6872    /// +---+----------+
6873    /// | # | Operands |
6874    /// +---+----------+
6875    /// | 1 | Xmm, Mem |
6876    /// | 2 | Xmm, Xmm |
6877    /// +---+----------+
6878    /// ```
6879    #[inline]
6880    pub fn sse_unpcklpd<A, B>(&mut self, op0: A, op1: B)
6881    where Assembler<'a>: SseUnpcklpdEmitter<A, B> {
6882        <Self as SseUnpcklpdEmitter<A, B>>::sse_unpcklpd(self, op0, op1);
6883    }
6884    /// `SSE_XORPD` (XORPD). 
6885    /// Performs a bitwise logical XOR of the two, four or eight packed double precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
6886    ///
6887    ///
6888    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/XORPD.html).
6889    ///
6890    /// Supported operand variants:
6891    ///
6892    /// ```text
6893    /// +---+----------+
6894    /// | # | Operands |
6895    /// +---+----------+
6896    /// | 1 | Xmm, Mem |
6897    /// | 2 | Xmm, Xmm |
6898    /// +---+----------+
6899    /// ```
6900    #[inline]
6901    pub fn sse_xorpd<A, B>(&mut self, op0: A, op1: B)
6902    where Assembler<'a>: SseXorpdEmitter<A, B> {
6903        <Self as SseXorpdEmitter<A, B>>::sse_xorpd(self, op0, op1);
6904    }
6905}
asmkit/x86/features/SSE2.rs

asmkit/x86/features/
SSE2.rs