Skip to main content

asmkit/x86/features/
SSE.rs

1use crate::x86::assembler::*;
2use crate::x86::operands::*;
3use super::super::opcodes::*;
4use crate::core::emitter::*;
5use crate::core::operand::*;
6
7/// A dummy operand that represents no register. Here just for simplicity.
8const NOREG: Operand = Operand::new();
9
10/// `LDMXCSR` (LDMXCSR). 
11/// Loads the source operand into the MXCSR control/status register. The source operand is a 32-bit memory location. See “MXCSR Control and Status Register” in Chapter 10, of the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for a description of the MXCSR register and its contents.
12///
13///
14/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDMXCSR.html).
15///
16/// Supported operand variants:
17///
18/// ```text
19/// +---+----------+
20/// | # | Operands |
21/// +---+----------+
22/// | 1 | Mem      |
23/// +---+----------+
24/// ```
25pub trait LdmxcsrEmitter<A> {
26    fn ldmxcsr(&mut self, op0: A);
27}
28
29impl<'a> LdmxcsrEmitter<Mem> for Assembler<'a> {
30    fn ldmxcsr(&mut self, op0: Mem) {
31        self.emit(LDMXCSRM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
32    }
33}
34
35/// `MMX_MASKMOVQ` (MASKMOVQ). 
36/// Stores selected bytes from the source operand (first operand) into a 64-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are MMX technology registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
37///
38///
39/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVQ.html).
40///
41/// Supported operand variants:
42///
43/// ```text
44/// +---+----------+
45/// | # | Operands |
46/// +---+----------+
47/// | 1 | Mm, Mm   |
48/// +---+----------+
49/// ```
50pub trait MmxMaskmovqEmitter<A, B> {
51    fn mmx_maskmovq(&mut self, op0: A, op1: B);
52}
53
54impl<'a> MmxMaskmovqEmitter<Mm, Mm> for Assembler<'a> {
55    fn mmx_maskmovq(&mut self, op0: Mm, op1: Mm) {
56        self.emit(MMX_MASKMOVQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
57    }
58}
59
60/// `MMX_MOVDQ2Q`.
61///
62/// Supported operand variants:
63///
64/// ```text
65/// +---+----------+
66/// | # | Operands |
67/// +---+----------+
68/// | 1 | Mm, Xmm  |
69/// +---+----------+
70/// ```
71pub trait MmxMovdq2qEmitter<A, B> {
72    fn mmx_movdq2q(&mut self, op0: A, op1: B);
73}
74
75impl<'a> MmxMovdq2qEmitter<Mm, Xmm> for Assembler<'a> {
76    fn mmx_movdq2q(&mut self, op0: Mm, op1: Xmm) {
77        self.emit(MMX_MOVDQ2QRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
78    }
79}
80
81/// `MMX_MOVNTQ` (MOVNTQ). 
82/// Moves the quadword in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to minimize cache pollution during the write to memory. The source operand is an MMX technology register, which is assumed to contain packed integer data (packed bytes, words, or doublewords). The destination operand is a 64-bit memory location.
83///
84///
85/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTQ.html).
86///
87/// Supported operand variants:
88///
89/// ```text
90/// +---+----------+
91/// | # | Operands |
92/// +---+----------+
93/// | 1 | Mem, Mm  |
94/// +---+----------+
95/// ```
96pub trait MmxMovntqEmitter<A, B> {
97    fn mmx_movntq(&mut self, op0: A, op1: B);
98}
99
100impl<'a> MmxMovntqEmitter<Mem, Mm> for Assembler<'a> {
101    fn mmx_movntq(&mut self, op0: Mem, op1: Mm) {
102        self.emit(MMX_MOVNTQMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
103    }
104}
105
106/// `MMX_MOVQ2DQ` (MOVQ2DQ). 
107/// Moves the quadword from the source operand (second operand) to the low quadword of the destination operand (first operand). The source operand is an MMX technology register and the destination operand is an XMM register.
108///
109///
110/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVQ2DQ.html).
111///
112/// Supported operand variants:
113///
114/// ```text
115/// +---+----------+
116/// | # | Operands |
117/// +---+----------+
118/// | 1 | Xmm, Mm  |
119/// +---+----------+
120/// ```
121pub trait MmxMovq2dqEmitter<A, B> {
122    fn mmx_movq2dq(&mut self, op0: A, op1: B);
123}
124
125impl<'a> MmxMovq2dqEmitter<Xmm, Mm> for Assembler<'a> {
126    fn mmx_movq2dq(&mut self, op0: Xmm, op1: Mm) {
127        self.emit(MMX_MOVQ2DQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
128    }
129}
130
131/// `MMX_PAVGB` (PAVGB). 
132/// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
133///
134///
135/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
136///
137/// Supported operand variants:
138///
139/// ```text
140/// +---+----------+
141/// | # | Operands |
142/// +---+----------+
143/// | 1 | Mm, Mem  |
144/// | 2 | Mm, Mm   |
145/// +---+----------+
146/// ```
147pub trait MmxPavgbEmitter<A, B> {
148    fn mmx_pavgb(&mut self, op0: A, op1: B);
149}
150
151impl<'a> MmxPavgbEmitter<Mm, Mm> for Assembler<'a> {
152    fn mmx_pavgb(&mut self, op0: Mm, op1: Mm) {
153        self.emit(MMX_PAVGBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
154    }
155}
156
157impl<'a> MmxPavgbEmitter<Mm, Mem> for Assembler<'a> {
158    fn mmx_pavgb(&mut self, op0: Mm, op1: Mem) {
159        self.emit(MMX_PAVGBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
160    }
161}
162
163/// `MMX_PAVGW` (PAVGW). 
164/// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
165///
166///
167/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
168///
169/// Supported operand variants:
170///
171/// ```text
172/// +---+----------+
173/// | # | Operands |
174/// +---+----------+
175/// | 1 | Mm, Mem  |
176/// | 2 | Mm, Mm   |
177/// +---+----------+
178/// ```
179pub trait MmxPavgwEmitter<A, B> {
180    fn mmx_pavgw(&mut self, op0: A, op1: B);
181}
182
183impl<'a> MmxPavgwEmitter<Mm, Mm> for Assembler<'a> {
184    fn mmx_pavgw(&mut self, op0: Mm, op1: Mm) {
185        self.emit(MMX_PAVGWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
186    }
187}
188
189impl<'a> MmxPavgwEmitter<Mm, Mem> for Assembler<'a> {
190    fn mmx_pavgw(&mut self, op0: Mm, op1: Mem) {
191        self.emit(MMX_PAVGWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
192    }
193}
194
195/// `MMX_PEXTRW` (PEXTRW). 
196/// Copies the word in the source operand (second operand) specified by the count operand (third operand) to the destination operand (first operand). The source operand can be an MMX technology register or an XMM register. The destination operand can be the low word of a general-purpose register or a 16-bit memory address. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location. The content of the destination register above bit 16 is cleared (set to all 0s).
197///
198///
199/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRW.html).
200///
201/// Supported operand variants:
202///
203/// ```text
204/// +---+--------------+
205/// | # | Operands     |
206/// +---+--------------+
207/// | 1 | Gpq, Mm, Imm |
208/// +---+--------------+
209/// ```
210pub trait MmxPextrwEmitter<A, B, C> {
211    fn mmx_pextrw(&mut self, op0: A, op1: B, op2: C);
212}
213
214impl<'a> MmxPextrwEmitter<Gpq, Mm, Imm> for Assembler<'a> {
215    fn mmx_pextrw(&mut self, op0: Gpq, op1: Mm, op2: Imm) {
216        self.emit(MMX_PEXTRWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
217    }
218}
219
220/// `MMX_PINSRW` (PINSRW). 
221/// Three operand MMX and SSE instructions
222///
223///
224/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRW.html).
225///
226/// Supported operand variants:
227///
228/// ```text
229/// +---+--------------+
230/// | # | Operands     |
231/// +---+--------------+
232/// | 1 | Mm, Gpd, Imm |
233/// | 2 | Mm, Mem, Imm |
234/// +---+--------------+
235/// ```
236pub trait MmxPinsrwEmitter<A, B, C> {
237    fn mmx_pinsrw(&mut self, op0: A, op1: B, op2: C);
238}
239
240impl<'a> MmxPinsrwEmitter<Mm, Gpd, Imm> for Assembler<'a> {
241    fn mmx_pinsrw(&mut self, op0: Mm, op1: Gpd, op2: Imm) {
242        self.emit(MMX_PINSRWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
243    }
244}
245
246impl<'a> MmxPinsrwEmitter<Mm, Mem, Imm> for Assembler<'a> {
247    fn mmx_pinsrw(&mut self, op0: Mm, op1: Mem, op2: Imm) {
248        self.emit(MMX_PINSRWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
249    }
250}
251
252/// `MMX_PMAXSW` (PMAXSW). 
253/// Performs a SIMD compare of the packed signed byte, word, dword or qword integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
254///
255///
256/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXSB%3APMAXSW%3APMAXSD%3APMAXSQ.html).
257///
258/// Supported operand variants:
259///
260/// ```text
261/// +---+----------+
262/// | # | Operands |
263/// +---+----------+
264/// | 1 | Mm, Mem  |
265/// | 2 | Mm, Mm   |
266/// +---+----------+
267/// ```
268pub trait MmxPmaxswEmitter<A, B> {
269    fn mmx_pmaxsw(&mut self, op0: A, op1: B);
270}
271
272impl<'a> MmxPmaxswEmitter<Mm, Mm> for Assembler<'a> {
273    fn mmx_pmaxsw(&mut self, op0: Mm, op1: Mm) {
274        self.emit(MMX_PMAXSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
275    }
276}
277
278impl<'a> MmxPmaxswEmitter<Mm, Mem> for Assembler<'a> {
279    fn mmx_pmaxsw(&mut self, op0: Mm, op1: Mem) {
280        self.emit(MMX_PMAXSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
281    }
282}
283
284/// `MMX_PMAXUB` (PMAXUB). 
285/// Performs a SIMD compare of the packed unsigned byte, word integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
286///
287///
288/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXUB%3APMAXUW.html).
289///
290/// Supported operand variants:
291///
292/// ```text
293/// +---+----------+
294/// | # | Operands |
295/// +---+----------+
296/// | 1 | Mm, Mem  |
297/// | 2 | Mm, Mm   |
298/// +---+----------+
299/// ```
300pub trait MmxPmaxubEmitter<A, B> {
301    fn mmx_pmaxub(&mut self, op0: A, op1: B);
302}
303
304impl<'a> MmxPmaxubEmitter<Mm, Mm> for Assembler<'a> {
305    fn mmx_pmaxub(&mut self, op0: Mm, op1: Mm) {
306        self.emit(MMX_PMAXUBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
307    }
308}
309
310impl<'a> MmxPmaxubEmitter<Mm, Mem> for Assembler<'a> {
311    fn mmx_pmaxub(&mut self, op0: Mm, op1: Mem) {
312        self.emit(MMX_PMAXUBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
313    }
314}
315
316/// `MMX_PMINSW` (PMINSW). 
317/// Performs a SIMD compare of the packed signed byte, word, or dword integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
318///
319///
320/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINSB%3APMINSW.html).
321///
322/// Supported operand variants:
323///
324/// ```text
325/// +---+----------+
326/// | # | Operands |
327/// +---+----------+
328/// | 1 | Mm, Mem  |
329/// | 2 | Mm, Mm   |
330/// +---+----------+
331/// ```
332pub trait MmxPminswEmitter<A, B> {
333    fn mmx_pminsw(&mut self, op0: A, op1: B);
334}
335
336impl<'a> MmxPminswEmitter<Mm, Mm> for Assembler<'a> {
337    fn mmx_pminsw(&mut self, op0: Mm, op1: Mm) {
338        self.emit(MMX_PMINSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
339    }
340}
341
342impl<'a> MmxPminswEmitter<Mm, Mem> for Assembler<'a> {
343    fn mmx_pminsw(&mut self, op0: Mm, op1: Mem) {
344        self.emit(MMX_PMINSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
345    }
346}
347
348/// `MMX_PMINUB` (PMINUB). 
349/// Performs a SIMD compare of the packed unsigned byte or word integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
350///
351///
352/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINUB%3APMINUW.html).
353///
354/// Supported operand variants:
355///
356/// ```text
357/// +---+----------+
358/// | # | Operands |
359/// +---+----------+
360/// | 1 | Mm, Mem  |
361/// | 2 | Mm, Mm   |
362/// +---+----------+
363/// ```
364pub trait MmxPminubEmitter<A, B> {
365    fn mmx_pminub(&mut self, op0: A, op1: B);
366}
367
368impl<'a> MmxPminubEmitter<Mm, Mm> for Assembler<'a> {
369    fn mmx_pminub(&mut self, op0: Mm, op1: Mm) {
370        self.emit(MMX_PMINUBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
371    }
372}
373
374impl<'a> MmxPminubEmitter<Mm, Mem> for Assembler<'a> {
375    fn mmx_pminub(&mut self, op0: Mm, op1: Mem) {
376        self.emit(MMX_PMINUBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
377    }
378}
379
380/// `MMX_PMOVMSKB` (PMOVMSKB). 
381/// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
382///
383///
384/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
385///
386/// Supported operand variants:
387///
388/// ```text
389/// +---+----------+
390/// | # | Operands |
391/// +---+----------+
392/// | 1 | Gpq, Mm  |
393/// +---+----------+
394/// ```
395pub trait MmxPmovmskbEmitter<A, B> {
396    fn mmx_pmovmskb(&mut self, op0: A, op1: B);
397}
398
399impl<'a> MmxPmovmskbEmitter<Gpq, Mm> for Assembler<'a> {
400    fn mmx_pmovmskb(&mut self, op0: Gpq, op1: Mm) {
401        self.emit(MMX_PMOVMSKBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
402    }
403}
404
405/// `MMX_PMULHUW` (PMULHUW). 
406/// Performs a SIMD unsigned multiply of the packed unsigned word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each 32-bit intermediate results in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
407///
408///
409/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHUW.html).
410///
411/// Supported operand variants:
412///
413/// ```text
414/// +---+----------+
415/// | # | Operands |
416/// +---+----------+
417/// | 1 | Mm, Mem  |
418/// | 2 | Mm, Mm   |
419/// +---+----------+
420/// ```
421pub trait MmxPmulhuwEmitter<A, B> {
422    fn mmx_pmulhuw(&mut self, op0: A, op1: B);
423}
424
425impl<'a> MmxPmulhuwEmitter<Mm, Mm> for Assembler<'a> {
426    fn mmx_pmulhuw(&mut self, op0: Mm, op1: Mm) {
427        self.emit(MMX_PMULHUWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
428    }
429}
430
431impl<'a> MmxPmulhuwEmitter<Mm, Mem> for Assembler<'a> {
432    fn mmx_pmulhuw(&mut self, op0: Mm, op1: Mem) {
433        self.emit(MMX_PMULHUWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
434    }
435}
436
437/// `MMX_PSADBW` (PSADBW). 
438/// Computes the absolute value of the difference of 8 unsigned byte integers from the source operand (second operand) and from the destination operand (first operand). These 8 differences are then summed to produce an unsigned word integer result that is stored in the destination operand. Figure 4-14 shows the operation of the PSADBW instruction when using 64-bit operands.
439///
440///
441/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSADBW.html).
442///
443/// Supported operand variants:
444///
445/// ```text
446/// +---+----------+
447/// | # | Operands |
448/// +---+----------+
449/// | 1 | Mm, Mem  |
450/// | 2 | Mm, Mm   |
451/// +---+----------+
452/// ```
453pub trait MmxPsadbwEmitter<A, B> {
454    fn mmx_psadbw(&mut self, op0: A, op1: B);
455}
456
457impl<'a> MmxPsadbwEmitter<Mm, Mm> for Assembler<'a> {
458    fn mmx_psadbw(&mut self, op0: Mm, op1: Mm) {
459        self.emit(MMX_PSADBWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
460    }
461}
462
463impl<'a> MmxPsadbwEmitter<Mm, Mem> for Assembler<'a> {
464    fn mmx_psadbw(&mut self, op0: Mm, op1: Mem) {
465        self.emit(MMX_PSADBWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
466    }
467}
468
469/// `MMX_PSHUFW` (PSHUFW). 
470/// Copies words from the source operand (second operand) and inserts them in the destination operand (first operand) at word locations selected with the order operand (third operand). This operation is similar to the operation used by the PSHUFD instruction, which is illustrated in Figure 4-16. For the PSHUFW instruction, each 2-bit field in the order operand selects the contents of one word location in the destination operand. The encodings of the order operand fields select words from the source operand to be copied to the destination operand.
471///
472///
473/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFW.html).
474///
475/// Supported operand variants:
476///
477/// ```text
478/// +---+--------------+
479/// | # | Operands     |
480/// +---+--------------+
481/// | 1 | Mm, Mem, Imm |
482/// | 2 | Mm, Mm, Imm  |
483/// +---+--------------+
484/// ```
485pub trait MmxPshufwEmitter<A, B, C> {
486    fn mmx_pshufw(&mut self, op0: A, op1: B, op2: C);
487}
488
489impl<'a> MmxPshufwEmitter<Mm, Mm, Imm> for Assembler<'a> {
490    fn mmx_pshufw(&mut self, op0: Mm, op1: Mm, op2: Imm) {
491        self.emit(MMX_PSHUFWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
492    }
493}
494
495impl<'a> MmxPshufwEmitter<Mm, Mem, Imm> for Assembler<'a> {
496    fn mmx_pshufw(&mut self, op0: Mm, op1: Mem, op2: Imm) {
497        self.emit(MMX_PSHUFWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
498    }
499}
500
501/// `PREFETCHNTA` (PREFETCHNTA). 
502/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
503///
504///
505/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
506///
507/// Supported operand variants:
508///
509/// ```text
510/// +---+----------+
511/// | # | Operands |
512/// +---+----------+
513/// | 1 | Mem      |
514/// +---+----------+
515/// ```
516pub trait PrefetchntaEmitter<A> {
517    fn prefetchnta(&mut self, op0: A);
518}
519
520impl<'a> PrefetchntaEmitter<Mem> for Assembler<'a> {
521    fn prefetchnta(&mut self, op0: Mem) {
522        self.emit(PREFETCHNTAM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
523    }
524}
525
526/// `PREFETCHT0` (PREFETCHT0). 
527/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
528///
529///
530/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
531///
532/// Supported operand variants:
533///
534/// ```text
535/// +---+----------+
536/// | # | Operands |
537/// +---+----------+
538/// | 1 | Mem      |
539/// +---+----------+
540/// ```
541pub trait Prefetcht0Emitter<A> {
542    fn prefetcht0(&mut self, op0: A);
543}
544
545impl<'a> Prefetcht0Emitter<Mem> for Assembler<'a> {
546    fn prefetcht0(&mut self, op0: Mem) {
547        self.emit(PREFETCHT0M, op0.as_operand(), &NOREG, &NOREG, &NOREG);
548    }
549}
550
551/// `PREFETCHT1` (PREFETCHT1). 
552/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
553///
554///
555/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
556///
557/// Supported operand variants:
558///
559/// ```text
560/// +---+----------+
561/// | # | Operands |
562/// +---+----------+
563/// | 1 | Mem      |
564/// +---+----------+
565/// ```
566pub trait Prefetcht1Emitter<A> {
567    fn prefetcht1(&mut self, op0: A);
568}
569
570impl<'a> Prefetcht1Emitter<Mem> for Assembler<'a> {
571    fn prefetcht1(&mut self, op0: Mem) {
572        self.emit(PREFETCHT1M, op0.as_operand(), &NOREG, &NOREG, &NOREG);
573    }
574}
575
576/// `PREFETCHT2` (PREFETCHT2). 
577/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
578///
579///
580/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
581///
582/// Supported operand variants:
583///
584/// ```text
585/// +---+----------+
586/// | # | Operands |
587/// +---+----------+
588/// | 1 | Mem      |
589/// +---+----------+
590/// ```
591pub trait Prefetcht2Emitter<A> {
592    fn prefetcht2(&mut self, op0: A);
593}
594
595impl<'a> Prefetcht2Emitter<Mem> for Assembler<'a> {
596    fn prefetcht2(&mut self, op0: Mem) {
597        self.emit(PREFETCHT2M, op0.as_operand(), &NOREG, &NOREG, &NOREG);
598    }
599}
600
601/// `SFENCE` (SFENCE). 
602/// Orders processor execution relative to all memory stores prior to the SFENCE instruction. The processor ensures that every store prior to SFENCE is globally visible before any store after SFENCE becomes globally visible. The SFENCE instruction is ordered with respect to memory stores, other SFENCE instructions, MFENCE instructions, and any serializing instructions (such as the CPUID instruction). It is not ordered with respect to memory loads or the LFENCE instruction.
603///
604///
605/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SFENCE.html).
606///
607/// Supported operand variants:
608///
609/// ```text
610/// +---+----------+
611/// | # | Operands |
612/// +---+----------+
613/// | 1 | (none)   |
614/// +---+----------+
615/// ```
616pub trait SfenceEmitter {
617    fn sfence(&mut self);
618}
619
620impl<'a> SfenceEmitter for Assembler<'a> {
621    fn sfence(&mut self) {
622        self.emit(SFENCE, &NOREG, &NOREG, &NOREG, &NOREG);
623    }
624}
625
626/// `SSE_ADDPS` (ADDPS). 
627/// Adds four, eight or sixteen packed single precision floating-point values from the first source operand with the second source operand, and stores the packed single precision floating-point result in the destination operand.
628///
629///
630/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDPS.html).
631///
632/// Supported operand variants:
633///
634/// ```text
635/// +---+----------+
636/// | # | Operands |
637/// +---+----------+
638/// | 1 | Xmm, Mem |
639/// | 2 | Xmm, Xmm |
640/// +---+----------+
641/// ```
642pub trait SseAddpsEmitter<A, B> {
643    fn sse_addps(&mut self, op0: A, op1: B);
644}
645
646impl<'a> SseAddpsEmitter<Xmm, Xmm> for Assembler<'a> {
647    fn sse_addps(&mut self, op0: Xmm, op1: Xmm) {
648        self.emit(SSE_ADDPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
649    }
650}
651
652impl<'a> SseAddpsEmitter<Xmm, Mem> for Assembler<'a> {
653    fn sse_addps(&mut self, op0: Xmm, op1: Mem) {
654        self.emit(SSE_ADDPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
655    }
656}
657
658/// `SSE_ADDSS` (ADDSS). 
659/// Adds the low single precision floating-point values from the second source operand and the first source operand, and stores the double precision floating-point result in the destination operand.
660///
661///
662/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSS.html).
663///
664/// Supported operand variants:
665///
666/// ```text
667/// +---+----------+
668/// | # | Operands |
669/// +---+----------+
670/// | 1 | Xmm, Mem |
671/// | 2 | Xmm, Xmm |
672/// +---+----------+
673/// ```
674pub trait SseAddssEmitter<A, B> {
675    fn sse_addss(&mut self, op0: A, op1: B);
676}
677
678impl<'a> SseAddssEmitter<Xmm, Xmm> for Assembler<'a> {
679    fn sse_addss(&mut self, op0: Xmm, op1: Xmm) {
680        self.emit(SSE_ADDSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
681    }
682}
683
684impl<'a> SseAddssEmitter<Xmm, Mem> for Assembler<'a> {
685    fn sse_addss(&mut self, op0: Xmm, op1: Mem) {
686        self.emit(SSE_ADDSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
687    }
688}
689
690/// `SSE_ANDNPS` (ANDNPS). 
691/// Performs a bitwise logical AND NOT of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
692///
693///
694/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDNPS.html).
695///
696/// Supported operand variants:
697///
698/// ```text
699/// +---+----------+
700/// | # | Operands |
701/// +---+----------+
702/// | 1 | Xmm, Mem |
703/// | 2 | Xmm, Xmm |
704/// +---+----------+
705/// ```
706pub trait SseAndnpsEmitter<A, B> {
707    fn sse_andnps(&mut self, op0: A, op1: B);
708}
709
710impl<'a> SseAndnpsEmitter<Xmm, Xmm> for Assembler<'a> {
711    fn sse_andnps(&mut self, op0: Xmm, op1: Xmm) {
712        self.emit(SSE_ANDNPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
713    }
714}
715
716impl<'a> SseAndnpsEmitter<Xmm, Mem> for Assembler<'a> {
717    fn sse_andnps(&mut self, op0: Xmm, op1: Mem) {
718        self.emit(SSE_ANDNPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
719    }
720}
721
722/// `SSE_ANDPS` (ANDPS). 
723/// Performs a bitwise logical AND of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
724///
725///
726/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDPS.html).
727///
728/// Supported operand variants:
729///
730/// ```text
731/// +---+----------+
732/// | # | Operands |
733/// +---+----------+
734/// | 1 | Xmm, Mem |
735/// | 2 | Xmm, Xmm |
736/// +---+----------+
737/// ```
738pub trait SseAndpsEmitter<A, B> {
739    fn sse_andps(&mut self, op0: A, op1: B);
740}
741
742impl<'a> SseAndpsEmitter<Xmm, Xmm> for Assembler<'a> {
743    fn sse_andps(&mut self, op0: Xmm, op1: Xmm) {
744        self.emit(SSE_ANDPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
745    }
746}
747
748impl<'a> SseAndpsEmitter<Xmm, Mem> for Assembler<'a> {
749    fn sse_andps(&mut self, op0: Xmm, op1: Mem) {
750        self.emit(SSE_ANDPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
751    }
752}
753
754/// `SSE_CMPPS` (CMPPS). 
755/// Performs a SIMD compare of the packed single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
756///
757///
758/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPS.html).
759///
760/// Supported operand variants:
761///
762/// ```text
763/// +---+---------------+
764/// | # | Operands      |
765/// +---+---------------+
766/// | 1 | Xmm, Mem, Imm |
767/// | 2 | Xmm, Xmm, Imm |
768/// +---+---------------+
769/// ```
770pub trait SseCmppsEmitter<A, B, C> {
771    fn sse_cmpps(&mut self, op0: A, op1: B, op2: C);
772}
773
774impl<'a> SseCmppsEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
775    fn sse_cmpps(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
776        self.emit(SSE_CMPPSRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
777    }
778}
779
780impl<'a> SseCmppsEmitter<Xmm, Mem, Imm> for Assembler<'a> {
781    fn sse_cmpps(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
782        self.emit(SSE_CMPPSRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
783    }
784}
785
786/// `SSE_CMPSS` (CMPSS). 
787/// Compares the low single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
788///
789///
790/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPSS.html).
791///
792/// Supported operand variants:
793///
794/// ```text
795/// +---+---------------+
796/// | # | Operands      |
797/// +---+---------------+
798/// | 1 | Xmm, Mem, Imm |
799/// | 2 | Xmm, Xmm, Imm |
800/// +---+---------------+
801/// ```
802pub trait SseCmpssEmitter<A, B, C> {
803    fn sse_cmpss(&mut self, op0: A, op1: B, op2: C);
804}
805
806impl<'a> SseCmpssEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
807    fn sse_cmpss(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
808        self.emit(SSE_CMPSSRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
809    }
810}
811
812impl<'a> SseCmpssEmitter<Xmm, Mem, Imm> for Assembler<'a> {
813    fn sse_cmpss(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
814        self.emit(SSE_CMPSSRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
815    }
816}
817
818/// `SSE_COMISS` (COMISS). 
819/// Compares the single precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
820///
821///
822/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/COMISS.html).
823///
824/// Supported operand variants:
825///
826/// ```text
827/// +---+----------+
828/// | # | Operands |
829/// +---+----------+
830/// | 1 | Xmm, Mem |
831/// | 2 | Xmm, Xmm |
832/// +---+----------+
833/// ```
834pub trait SseComissEmitter<A, B> {
835    fn sse_comiss(&mut self, op0: A, op1: B);
836}
837
838impl<'a> SseComissEmitter<Xmm, Xmm> for Assembler<'a> {
839    fn sse_comiss(&mut self, op0: Xmm, op1: Xmm) {
840        self.emit(SSE_COMISSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
841    }
842}
843
844impl<'a> SseComissEmitter<Xmm, Mem> for Assembler<'a> {
845    fn sse_comiss(&mut self, op0: Xmm, op1: Mem) {
846        self.emit(SSE_COMISSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
847    }
848}
849
850/// `SSE_CVTSI2SS` (CVTSI2SS). 
851/// Converts a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the “convert-from” source operand to a single precision floating-point value in the destination operand (first operand). The “convert-from” source operand can be a general-purpose register or a memory location. The destination operand is an XMM register. The result is stored in the low doubleword of the destination operand, and the upper three doublewords are left unchanged. When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits.
852///
853///
854/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSI2SS.html).
855///
856/// Supported operand variants:
857///
858/// ```text
859/// +---+----------+
860/// | # | Operands |
861/// +---+----------+
862/// | 1 | Xmm, Gpd |
863/// | 2 | Xmm, Gpq |
864/// | 3 | Xmm, Mem |
865/// +---+----------+
866/// ```
867pub trait SseCvtsi2ssEmitter<A, B> {
868    fn sse_cvtsi2ss(&mut self, op0: A, op1: B);
869}
870
871impl<'a> SseCvtsi2ssEmitter<Xmm, Gpd> for Assembler<'a> {
872    fn sse_cvtsi2ss(&mut self, op0: Xmm, op1: Gpd) {
873        self.emit(SSE_CVTSI2SS32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
874    }
875}
876
877impl<'a> SseCvtsi2ssEmitter<Xmm, Mem> for Assembler<'a> {
878    fn sse_cvtsi2ss(&mut self, op0: Xmm, op1: Mem) {
879        self.emit(SSE_CVTSI2SS32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
880    }
881}
882
883impl<'a> SseCvtsi2ssEmitter<Xmm, Gpq> for Assembler<'a> {
884    fn sse_cvtsi2ss(&mut self, op0: Xmm, op1: Gpq) {
885        self.emit(SSE_CVTSI2SS64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
886    }
887}
888
889/// `SSE_CVTSS2SI` (CVTSS2SI). 
890/// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
891///
892///
893/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSS2SI.html).
894///
895/// Supported operand variants:
896///
897/// ```text
898/// +---+----------+
899/// | # | Operands |
900/// +---+----------+
901/// | 1 | Gpd, Mem |
902/// | 2 | Gpd, Xmm |
903/// | 3 | Gpq, Mem |
904/// | 4 | Gpq, Xmm |
905/// +---+----------+
906/// ```
907pub trait SseCvtss2siEmitter<A, B> {
908    fn sse_cvtss2si(&mut self, op0: A, op1: B);
909}
910
911impl<'a> SseCvtss2siEmitter<Gpd, Xmm> for Assembler<'a> {
912    fn sse_cvtss2si(&mut self, op0: Gpd, op1: Xmm) {
913        self.emit(SSE_CVTSS2SI32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
914    }
915}
916
917impl<'a> SseCvtss2siEmitter<Gpd, Mem> for Assembler<'a> {
918    fn sse_cvtss2si(&mut self, op0: Gpd, op1: Mem) {
919        self.emit(SSE_CVTSS2SI32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
920    }
921}
922
923impl<'a> SseCvtss2siEmitter<Gpq, Xmm> for Assembler<'a> {
924    fn sse_cvtss2si(&mut self, op0: Gpq, op1: Xmm) {
925        self.emit(SSE_CVTSS2SI64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
926    }
927}
928
929impl<'a> SseCvtss2siEmitter<Gpq, Mem> for Assembler<'a> {
930    fn sse_cvtss2si(&mut self, op0: Gpq, op1: Mem) {
931        self.emit(SSE_CVTSS2SI64RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
932    }
933}
934
935/// `SSE_CVTTSS2SI` (CVTTSS2SI). 
936/// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
937///
938///
939/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTSS2SI.html).
940///
941/// Supported operand variants:
942///
943/// ```text
944/// +---+----------+
945/// | # | Operands |
946/// +---+----------+
947/// | 1 | Gpd, Mem |
948/// | 2 | Gpd, Xmm |
949/// | 3 | Gpq, Mem |
950/// | 4 | Gpq, Xmm |
951/// +---+----------+
952/// ```
953pub trait SseCvttss2siEmitter<A, B> {
954    fn sse_cvttss2si(&mut self, op0: A, op1: B);
955}
956
957impl<'a> SseCvttss2siEmitter<Gpd, Xmm> for Assembler<'a> {
958    fn sse_cvttss2si(&mut self, op0: Gpd, op1: Xmm) {
959        self.emit(SSE_CVTTSS2SI32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
960    }
961}
962
963impl<'a> SseCvttss2siEmitter<Gpd, Mem> for Assembler<'a> {
964    fn sse_cvttss2si(&mut self, op0: Gpd, op1: Mem) {
965        self.emit(SSE_CVTTSS2SI32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
966    }
967}
968
969impl<'a> SseCvttss2siEmitter<Gpq, Xmm> for Assembler<'a> {
970    fn sse_cvttss2si(&mut self, op0: Gpq, op1: Xmm) {
971        self.emit(SSE_CVTTSS2SI64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
972    }
973}
974
975impl<'a> SseCvttss2siEmitter<Gpq, Mem> for Assembler<'a> {
976    fn sse_cvttss2si(&mut self, op0: Gpq, op1: Mem) {
977        self.emit(SSE_CVTTSS2SI64RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
978    }
979}
980
981/// `SSE_DIVPS` (DIVPS). 
982/// Performs a SIMD divide of the four, eight or sixteen packed single precision floating-point values in the first source operand (the second operand) by the four, eight or sixteen packed single precision floating-point values in the second source operand (the third operand). Results are written to the destination operand (the first operand).
983///
984///
985/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVPS.html).
986///
987/// Supported operand variants:
988///
989/// ```text
990/// +---+----------+
991/// | # | Operands |
992/// +---+----------+
993/// | 1 | Xmm, Mem |
994/// | 2 | Xmm, Xmm |
995/// +---+----------+
996/// ```
997pub trait SseDivpsEmitter<A, B> {
998    fn sse_divps(&mut self, op0: A, op1: B);
999}
1000
1001impl<'a> SseDivpsEmitter<Xmm, Xmm> for Assembler<'a> {
1002    fn sse_divps(&mut self, op0: Xmm, op1: Xmm) {
1003        self.emit(SSE_DIVPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1004    }
1005}
1006
1007impl<'a> SseDivpsEmitter<Xmm, Mem> for Assembler<'a> {
1008    fn sse_divps(&mut self, op0: Xmm, op1: Mem) {
1009        self.emit(SSE_DIVPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1010    }
1011}
1012
1013/// `SSE_DIVSS` (DIVSS). 
1014/// Divides the low single precision floating-point value in the first source operand by the low single precision floating-point value in the second source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location.
1015///
1016///
1017/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVSS.html).
1018///
1019/// Supported operand variants:
1020///
1021/// ```text
1022/// +---+----------+
1023/// | # | Operands |
1024/// +---+----------+
1025/// | 1 | Xmm, Mem |
1026/// | 2 | Xmm, Xmm |
1027/// +---+----------+
1028/// ```
1029pub trait SseDivssEmitter<A, B> {
1030    fn sse_divss(&mut self, op0: A, op1: B);
1031}
1032
1033impl<'a> SseDivssEmitter<Xmm, Xmm> for Assembler<'a> {
1034    fn sse_divss(&mut self, op0: Xmm, op1: Xmm) {
1035        self.emit(SSE_DIVSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1036    }
1037}
1038
1039impl<'a> SseDivssEmitter<Xmm, Mem> for Assembler<'a> {
1040    fn sse_divss(&mut self, op0: Xmm, op1: Mem) {
1041        self.emit(SSE_DIVSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1042    }
1043}
1044
1045/// `SSE_MAXPS` (MAXPS). 
1046/// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand.
1047///
1048///
1049/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXPS.html).
1050///
1051/// Supported operand variants:
1052///
1053/// ```text
1054/// +---+----------+
1055/// | # | Operands |
1056/// +---+----------+
1057/// | 1 | Xmm, Mem |
1058/// | 2 | Xmm, Xmm |
1059/// +---+----------+
1060/// ```
1061pub trait SseMaxpsEmitter<A, B> {
1062    fn sse_maxps(&mut self, op0: A, op1: B);
1063}
1064
1065impl<'a> SseMaxpsEmitter<Xmm, Xmm> for Assembler<'a> {
1066    fn sse_maxps(&mut self, op0: Xmm, op1: Xmm) {
1067        self.emit(SSE_MAXPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1068    }
1069}
1070
1071impl<'a> SseMaxpsEmitter<Xmm, Mem> for Assembler<'a> {
1072    fn sse_maxps(&mut self, op0: Xmm, op1: Mem) {
1073        self.emit(SSE_MAXPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1074    }
1075}
1076
1077/// `SSE_MAXSS` (MAXSS). 
1078/// Compares the low single precision floating-point values in the first source operand and the second source operand, and returns the maximum value to the low doubleword of the destination operand.
1079///
1080///
1081/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXSS.html).
1082///
1083/// Supported operand variants:
1084///
1085/// ```text
1086/// +---+----------+
1087/// | # | Operands |
1088/// +---+----------+
1089/// | 1 | Xmm, Mem |
1090/// | 2 | Xmm, Xmm |
1091/// +---+----------+
1092/// ```
1093pub trait SseMaxssEmitter<A, B> {
1094    fn sse_maxss(&mut self, op0: A, op1: B);
1095}
1096
1097impl<'a> SseMaxssEmitter<Xmm, Xmm> for Assembler<'a> {
1098    fn sse_maxss(&mut self, op0: Xmm, op1: Xmm) {
1099        self.emit(SSE_MAXSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1100    }
1101}
1102
1103impl<'a> SseMaxssEmitter<Xmm, Mem> for Assembler<'a> {
1104    fn sse_maxss(&mut self, op0: Xmm, op1: Mem) {
1105        self.emit(SSE_MAXSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1106    }
1107}
1108
1109/// `SSE_MINPS` (MINPS). 
1110/// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the minimum value for each pair of values to the destination operand.
1111///
1112///
1113/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINPS.html).
1114///
1115/// Supported operand variants:
1116///
1117/// ```text
1118/// +---+----------+
1119/// | # | Operands |
1120/// +---+----------+
1121/// | 1 | Xmm, Mem |
1122/// | 2 | Xmm, Xmm |
1123/// +---+----------+
1124/// ```
1125pub trait SseMinpsEmitter<A, B> {
1126    fn sse_minps(&mut self, op0: A, op1: B);
1127}
1128
1129impl<'a> SseMinpsEmitter<Xmm, Xmm> for Assembler<'a> {
1130    fn sse_minps(&mut self, op0: Xmm, op1: Xmm) {
1131        self.emit(SSE_MINPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1132    }
1133}
1134
1135impl<'a> SseMinpsEmitter<Xmm, Mem> for Assembler<'a> {
1136    fn sse_minps(&mut self, op0: Xmm, op1: Mem) {
1137        self.emit(SSE_MINPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1138    }
1139}
1140
1141/// `SSE_MINSS` (MINSS). 
1142/// Compares the low single precision floating-point values in the first source operand and the second source operand and returns the minimum value to the low doubleword of the destination operand.
1143///
1144///
1145/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINSS.html).
1146///
1147/// Supported operand variants:
1148///
1149/// ```text
1150/// +---+----------+
1151/// | # | Operands |
1152/// +---+----------+
1153/// | 1 | Xmm, Mem |
1154/// | 2 | Xmm, Xmm |
1155/// +---+----------+
1156/// ```
1157pub trait SseMinssEmitter<A, B> {
1158    fn sse_minss(&mut self, op0: A, op1: B);
1159}
1160
1161impl<'a> SseMinssEmitter<Xmm, Xmm> for Assembler<'a> {
1162    fn sse_minss(&mut self, op0: Xmm, op1: Xmm) {
1163        self.emit(SSE_MINSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1164    }
1165}
1166
1167impl<'a> SseMinssEmitter<Xmm, Mem> for Assembler<'a> {
1168    fn sse_minss(&mut self, op0: Xmm, op1: Mem) {
1169        self.emit(SSE_MINSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1170    }
1171}
1172
1173/// `SSE_MOVAPS` (MOVAPS). 
1174/// Moves 4, 8 or 16 single precision floating-point values from the source operand (second operand) to the destination operand (first operand). This instruction can be used to load an XMM, YMM or ZMM register from an 128-bit, 256-bit or 512-bit memory location, to store the contents of an XMM, YMM or ZMM register into a 128-bit, 256-bit or 512-bit memory location, or to move data between two XMM, two YMM or two ZMM registers.
1175///
1176///
1177/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVAPS.html).
1178///
1179/// Supported operand variants:
1180///
1181/// ```text
1182/// +---+----------+
1183/// | # | Operands |
1184/// +---+----------+
1185/// | 1 | Mem, Xmm |
1186/// | 2 | Xmm, Mem |
1187/// | 3 | Xmm, Xmm |
1188/// +---+----------+
1189/// ```
1190pub trait SseMovapsEmitter<A, B> {
1191    fn sse_movaps(&mut self, op0: A, op1: B);
1192}
1193
1194impl<'a> SseMovapsEmitter<Xmm, Xmm> for Assembler<'a> {
1195    fn sse_movaps(&mut self, op0: Xmm, op1: Xmm) {
1196        self.emit(SSE_MOVAPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1197    }
1198}
1199
1200impl<'a> SseMovapsEmitter<Xmm, Mem> for Assembler<'a> {
1201    fn sse_movaps(&mut self, op0: Xmm, op1: Mem) {
1202        self.emit(SSE_MOVAPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1203    }
1204}
1205
1206impl<'a> SseMovapsEmitter<Mem, Xmm> for Assembler<'a> {
1207    fn sse_movaps(&mut self, op0: Mem, op1: Xmm) {
1208        self.emit(SSE_MOVAPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1209    }
1210}
1211
1212/// `SSE_MOVHLPS` (MOVHLPS). 
1213/// This instruction cannot be used for memory to register moves.
1214///
1215///
1216/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHLPS.html).
1217///
1218/// Supported operand variants:
1219///
1220/// ```text
1221/// +---+----------+
1222/// | # | Operands |
1223/// +---+----------+
1224/// | 1 | Xmm, Xmm |
1225/// +---+----------+
1226/// ```
1227pub trait SseMovhlpsEmitter<A, B> {
1228    fn sse_movhlps(&mut self, op0: A, op1: B);
1229}
1230
1231impl<'a> SseMovhlpsEmitter<Xmm, Xmm> for Assembler<'a> {
1232    fn sse_movhlps(&mut self, op0: Xmm, op1: Xmm) {
1233        self.emit(SSE_MOVHLPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1234    }
1235}
1236
1237/// `SSE_MOVHPS` (MOVHPS). 
1238/// This instruction cannot be used for register to register or memory to memory moves.
1239///
1240///
1241/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHPS.html).
1242///
1243/// Supported operand variants:
1244///
1245/// ```text
1246/// +---+----------+
1247/// | # | Operands |
1248/// +---+----------+
1249/// | 1 | Mem, Xmm |
1250/// | 2 | Xmm, Mem |
1251/// +---+----------+
1252/// ```
1253pub trait SseMovhpsEmitter<A, B> {
1254    fn sse_movhps(&mut self, op0: A, op1: B);
1255}
1256
1257impl<'a> SseMovhpsEmitter<Xmm, Mem> for Assembler<'a> {
1258    fn sse_movhps(&mut self, op0: Xmm, op1: Mem) {
1259        self.emit(SSE_MOVHPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1260    }
1261}
1262
1263impl<'a> SseMovhpsEmitter<Mem, Xmm> for Assembler<'a> {
1264    fn sse_movhps(&mut self, op0: Mem, op1: Xmm) {
1265        self.emit(SSE_MOVHPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1266    }
1267}
1268
1269/// `SSE_MOVLHPS` (MOVLHPS). 
1270/// This instruction cannot be used for memory to register moves.
1271///
1272///
1273/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLHPS.html).
1274///
1275/// Supported operand variants:
1276///
1277/// ```text
1278/// +---+----------+
1279/// | # | Operands |
1280/// +---+----------+
1281/// | 1 | Xmm, Xmm |
1282/// +---+----------+
1283/// ```
1284pub trait SseMovlhpsEmitter<A, B> {
1285    fn sse_movlhps(&mut self, op0: A, op1: B);
1286}
1287
1288impl<'a> SseMovlhpsEmitter<Xmm, Xmm> for Assembler<'a> {
1289    fn sse_movlhps(&mut self, op0: Xmm, op1: Xmm) {
1290        self.emit(SSE_MOVLHPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1291    }
1292}
1293
1294/// `SSE_MOVLPS` (MOVLPS). 
1295/// This instruction cannot be used for register to register or memory to memory moves.
1296///
1297///
1298/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLPS.html).
1299///
1300/// Supported operand variants:
1301///
1302/// ```text
1303/// +---+----------+
1304/// | # | Operands |
1305/// +---+----------+
1306/// | 1 | Mem, Xmm |
1307/// | 2 | Xmm, Mem |
1308/// +---+----------+
1309/// ```
1310pub trait SseMovlpsEmitter<A, B> {
1311    fn sse_movlps(&mut self, op0: A, op1: B);
1312}
1313
1314impl<'a> SseMovlpsEmitter<Xmm, Mem> for Assembler<'a> {
1315    fn sse_movlps(&mut self, op0: Xmm, op1: Mem) {
1316        self.emit(SSE_MOVLPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1317    }
1318}
1319
1320impl<'a> SseMovlpsEmitter<Mem, Xmm> for Assembler<'a> {
1321    fn sse_movlps(&mut self, op0: Mem, op1: Xmm) {
1322        self.emit(SSE_MOVLPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1323    }
1324}
1325
1326/// `SSE_MOVMSKPS` (MOVMSKPS). 
1327/// Extracts the sign bits from the packed single precision floating-point values in the source operand (second operand), formats them into a 4- or 8-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM or YMM register, and the destination operand is a general-purpose register. The mask is stored in the 4 or 8 low-order bits of the destination operand. The upper bits of the destination operand beyond the mask are filled with zeros.
1328///
1329///
1330/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPS.html).
1331///
1332/// Supported operand variants:
1333///
1334/// ```text
1335/// +---+----------+
1336/// | # | Operands |
1337/// +---+----------+
1338/// | 1 | Gpq, Xmm |
1339/// +---+----------+
1340/// ```
1341pub trait SseMovmskpsEmitter<A, B> {
1342    fn sse_movmskps(&mut self, op0: A, op1: B);
1343}
1344
1345impl<'a> SseMovmskpsEmitter<Gpq, Xmm> for Assembler<'a> {
1346    fn sse_movmskps(&mut self, op0: Gpq, op1: Xmm) {
1347        self.emit(SSE_MOVMSKPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1348    }
1349}
1350
1351/// `SSE_MOVNTPS` (MOVNTPS). 
1352/// Moves the packed single precision floating-point values in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain packed single precision, floating-pointing. The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (EVEX.512 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.
1353///
1354///
1355/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTPS.html).
1356///
1357/// Supported operand variants:
1358///
1359/// ```text
1360/// +---+----------+
1361/// | # | Operands |
1362/// +---+----------+
1363/// | 1 | Mem, Xmm |
1364/// +---+----------+
1365/// ```
1366pub trait SseMovntpsEmitter<A, B> {
1367    fn sse_movntps(&mut self, op0: A, op1: B);
1368}
1369
1370impl<'a> SseMovntpsEmitter<Mem, Xmm> for Assembler<'a> {
1371    fn sse_movntps(&mut self, op0: Mem, op1: Xmm) {
1372        self.emit(SSE_MOVNTPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1373    }
1374}
1375
1376/// `SSE_MOVNTSS`.
1377///
1378/// Supported operand variants:
1379///
1380/// ```text
1381/// +---+----------+
1382/// | # | Operands |
1383/// +---+----------+
1384/// | 1 | Mem, Xmm |
1385/// +---+----------+
1386/// ```
1387pub trait SseMovntssEmitter<A, B> {
1388    fn sse_movntss(&mut self, op0: A, op1: B);
1389}
1390
1391impl<'a> SseMovntssEmitter<Mem, Xmm> for Assembler<'a> {
1392    fn sse_movntss(&mut self, op0: Mem, op1: Xmm) {
1393        self.emit(SSE_MOVNTSSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1394    }
1395}
1396
1397/// `SSE_MOVSS` (MOVSS). 
1398/// Moves a scalar single precision floating-point value from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be XMM registers or 32-bit memory locations. This instruction can be used to move a single precision floating-point value to and from the low doubleword of an XMM register and a 32-bit memory location, or to move a single precision floating-point value between the low doublewords of two XMM registers. The instruction cannot be used to transfer data between memory locations.
1399///
1400///
1401/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVSS.html).
1402///
1403/// Supported operand variants:
1404///
1405/// ```text
1406/// +---+----------+
1407/// | # | Operands |
1408/// +---+----------+
1409/// | 1 | Mem, Xmm |
1410/// | 2 | Xmm, Mem |
1411/// | 3 | Xmm, Xmm |
1412/// +---+----------+
1413/// ```
1414pub trait SseMovssEmitter<A, B> {
1415    fn sse_movss(&mut self, op0: A, op1: B);
1416}
1417
1418impl<'a> SseMovssEmitter<Xmm, Xmm> for Assembler<'a> {
1419    fn sse_movss(&mut self, op0: Xmm, op1: Xmm) {
1420        self.emit(SSE_MOVSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1421    }
1422}
1423
1424impl<'a> SseMovssEmitter<Xmm, Mem> for Assembler<'a> {
1425    fn sse_movss(&mut self, op0: Xmm, op1: Mem) {
1426        self.emit(SSE_MOVSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1427    }
1428}
1429
1430impl<'a> SseMovssEmitter<Mem, Xmm> for Assembler<'a> {
1431    fn sse_movss(&mut self, op0: Mem, op1: Xmm) {
1432        self.emit(SSE_MOVSSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1433    }
1434}
1435
1436/// `SSE_MOVUPS` (MOVUPS). 
1437/// Note: VEX.vvvv and EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
1438///
1439///
1440/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVUPS.html).
1441///
1442/// Supported operand variants:
1443///
1444/// ```text
1445/// +---+----------+
1446/// | # | Operands |
1447/// +---+----------+
1448/// | 1 | Mem, Xmm |
1449/// | 2 | Xmm, Mem |
1450/// | 3 | Xmm, Xmm |
1451/// +---+----------+
1452/// ```
1453pub trait SseMovupsEmitter<A, B> {
1454    fn sse_movups(&mut self, op0: A, op1: B);
1455}
1456
1457impl<'a> SseMovupsEmitter<Xmm, Xmm> for Assembler<'a> {
1458    fn sse_movups(&mut self, op0: Xmm, op1: Xmm) {
1459        self.emit(SSE_MOVUPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1460    }
1461}
1462
1463impl<'a> SseMovupsEmitter<Xmm, Mem> for Assembler<'a> {
1464    fn sse_movups(&mut self, op0: Xmm, op1: Mem) {
1465        self.emit(SSE_MOVUPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1466    }
1467}
1468
1469impl<'a> SseMovupsEmitter<Mem, Xmm> for Assembler<'a> {
1470    fn sse_movups(&mut self, op0: Mem, op1: Xmm) {
1471        self.emit(SSE_MOVUPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1472    }
1473}
1474
1475/// `SSE_MULPS` (MULPS). 
1476/// Multiply the packed single precision floating-point values from the first source operand with the corresponding values in the second source operand, and stores the packed double precision floating-point results in the destination operand.
1477///
1478///
1479/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULPS.html).
1480///
1481/// Supported operand variants:
1482///
1483/// ```text
1484/// +---+----------+
1485/// | # | Operands |
1486/// +---+----------+
1487/// | 1 | Xmm, Mem |
1488/// | 2 | Xmm, Xmm |
1489/// +---+----------+
1490/// ```
1491pub trait SseMulpsEmitter<A, B> {
1492    fn sse_mulps(&mut self, op0: A, op1: B);
1493}
1494
1495impl<'a> SseMulpsEmitter<Xmm, Xmm> for Assembler<'a> {
1496    fn sse_mulps(&mut self, op0: Xmm, op1: Xmm) {
1497        self.emit(SSE_MULPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1498    }
1499}
1500
1501impl<'a> SseMulpsEmitter<Xmm, Mem> for Assembler<'a> {
1502    fn sse_mulps(&mut self, op0: Xmm, op1: Mem) {
1503        self.emit(SSE_MULPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1504    }
1505}
1506
1507/// `SSE_MULSS` (MULSS). 
1508/// Multiplies the low single precision floating-point value from the second source operand by the low single precision floating-point value in the first source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source operand and the destination operands are XMM registers.
1509///
1510///
1511/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULSS.html).
1512///
1513/// Supported operand variants:
1514///
1515/// ```text
1516/// +---+----------+
1517/// | # | Operands |
1518/// +---+----------+
1519/// | 1 | Xmm, Mem |
1520/// | 2 | Xmm, Xmm |
1521/// +---+----------+
1522/// ```
1523pub trait SseMulssEmitter<A, B> {
1524    fn sse_mulss(&mut self, op0: A, op1: B);
1525}
1526
1527impl<'a> SseMulssEmitter<Xmm, Xmm> for Assembler<'a> {
1528    fn sse_mulss(&mut self, op0: Xmm, op1: Xmm) {
1529        self.emit(SSE_MULSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1530    }
1531}
1532
1533impl<'a> SseMulssEmitter<Xmm, Mem> for Assembler<'a> {
1534    fn sse_mulss(&mut self, op0: Xmm, op1: Mem) {
1535        self.emit(SSE_MULSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1536    }
1537}
1538
1539/// `SSE_ORPS` (ORPS). 
1540/// Performs a bitwise logical OR of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
1541///
1542///
1543/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ORPS.html).
1544///
1545/// Supported operand variants:
1546///
1547/// ```text
1548/// +---+----------+
1549/// | # | Operands |
1550/// +---+----------+
1551/// | 1 | Xmm, Mem |
1552/// | 2 | Xmm, Xmm |
1553/// +---+----------+
1554/// ```
1555pub trait SseOrpsEmitter<A, B> {
1556    fn sse_orps(&mut self, op0: A, op1: B);
1557}
1558
1559impl<'a> SseOrpsEmitter<Xmm, Xmm> for Assembler<'a> {
1560    fn sse_orps(&mut self, op0: Xmm, op1: Xmm) {
1561        self.emit(SSE_ORPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1562    }
1563}
1564
1565impl<'a> SseOrpsEmitter<Xmm, Mem> for Assembler<'a> {
1566    fn sse_orps(&mut self, op0: Xmm, op1: Mem) {
1567        self.emit(SSE_ORPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1568    }
1569}
1570
1571/// `SSE_RCPPS` (RCPPS). 
1572/// Performs a SIMD computation of the approximate reciprocals of the four packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
1573///
1574///
1575/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPPS.html).
1576///
1577/// Supported operand variants:
1578///
1579/// ```text
1580/// +---+----------+
1581/// | # | Operands |
1582/// +---+----------+
1583/// | 1 | Xmm, Mem |
1584/// | 2 | Xmm, Xmm |
1585/// +---+----------+
1586/// ```
1587pub trait SseRcppsEmitter<A, B> {
1588    fn sse_rcpps(&mut self, op0: A, op1: B);
1589}
1590
1591impl<'a> SseRcppsEmitter<Xmm, Xmm> for Assembler<'a> {
1592    fn sse_rcpps(&mut self, op0: Xmm, op1: Xmm) {
1593        self.emit(SSE_RCPPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1594    }
1595}
1596
1597impl<'a> SseRcppsEmitter<Xmm, Mem> for Assembler<'a> {
1598    fn sse_rcpps(&mut self, op0: Xmm, op1: Mem) {
1599        self.emit(SSE_RCPPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1600    }
1601}
1602
1603/// `SSE_RCPSS` (RCPSS). 
1604/// Computes of an approximate reciprocal of the low single precision floating-point value in the source operand (second operand) and stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
1605///
1606///
1607/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPSS.html).
1608///
1609/// Supported operand variants:
1610///
1611/// ```text
1612/// +---+----------+
1613/// | # | Operands |
1614/// +---+----------+
1615/// | 1 | Xmm, Mem |
1616/// | 2 | Xmm, Xmm |
1617/// +---+----------+
1618/// ```
1619pub trait SseRcpssEmitter<A, B> {
1620    fn sse_rcpss(&mut self, op0: A, op1: B);
1621}
1622
1623impl<'a> SseRcpssEmitter<Xmm, Xmm> for Assembler<'a> {
1624    fn sse_rcpss(&mut self, op0: Xmm, op1: Xmm) {
1625        self.emit(SSE_RCPSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1626    }
1627}
1628
1629impl<'a> SseRcpssEmitter<Xmm, Mem> for Assembler<'a> {
1630    fn sse_rcpss(&mut self, op0: Xmm, op1: Mem) {
1631        self.emit(SSE_RCPSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1632    }
1633}
1634
1635/// `SSE_RSQRTPS` (RSQRTPS). 
1636/// Performs a SIMD computation of the approximate reciprocals of the square roots of the four packed single precision floating-point values in the source operand (second operand) and stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
1637///
1638///
1639/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTPS.html).
1640///
1641/// Supported operand variants:
1642///
1643/// ```text
1644/// +---+----------+
1645/// | # | Operands |
1646/// +---+----------+
1647/// | 1 | Xmm, Mem |
1648/// | 2 | Xmm, Xmm |
1649/// +---+----------+
1650/// ```
1651pub trait SseRsqrtpsEmitter<A, B> {
1652    fn sse_rsqrtps(&mut self, op0: A, op1: B);
1653}
1654
1655impl<'a> SseRsqrtpsEmitter<Xmm, Xmm> for Assembler<'a> {
1656    fn sse_rsqrtps(&mut self, op0: Xmm, op1: Xmm) {
1657        self.emit(SSE_RSQRTPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1658    }
1659}
1660
1661impl<'a> SseRsqrtpsEmitter<Xmm, Mem> for Assembler<'a> {
1662    fn sse_rsqrtps(&mut self, op0: Xmm, op1: Mem) {
1663        self.emit(SSE_RSQRTPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1664    }
1665}
1666
1667/// `SSE_RSQRTSS` (RSQRTSS). 
1668/// Computes an approximate reciprocal of the square root of the low single precision floating-point value in the source operand (second operand) stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
1669///
1670///
1671/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTSS.html).
1672///
1673/// Supported operand variants:
1674///
1675/// ```text
1676/// +---+----------+
1677/// | # | Operands |
1678/// +---+----------+
1679/// | 1 | Xmm, Mem |
1680/// | 2 | Xmm, Xmm |
1681/// +---+----------+
1682/// ```
1683pub trait SseRsqrtssEmitter<A, B> {
1684    fn sse_rsqrtss(&mut self, op0: A, op1: B);
1685}
1686
1687impl<'a> SseRsqrtssEmitter<Xmm, Xmm> for Assembler<'a> {
1688    fn sse_rsqrtss(&mut self, op0: Xmm, op1: Xmm) {
1689        self.emit(SSE_RSQRTSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1690    }
1691}
1692
1693impl<'a> SseRsqrtssEmitter<Xmm, Mem> for Assembler<'a> {
1694    fn sse_rsqrtss(&mut self, op0: Xmm, op1: Mem) {
1695        self.emit(SSE_RSQRTSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1696    }
1697}
1698
1699/// `SSE_SHUFPS` (SHUFPS). 
1700/// Selects a single precision floating-point value of an input quadruplet using a two-bit control and move to a designated element of the destination operand. Each 64-bit element-pair of a 128-bit lane of the destination operand is interleaved between the corresponding lane of the first source operand and the second source operand at the granularity 128 bits. Each two bits in the imm8 byte, starting from bit 0, is the select control of the corresponding element of a 128-bit lane of the destination to received the shuffled result of an input quadruplet. The two lower elements of a 128-bit lane in the destination receives shuffle results from the quadruple of the first source operand. The next two elements of the destination receives shuffle results from the quadruple of the second source operand.
1701///
1702///
1703/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SHUFPS.html).
1704///
1705/// Supported operand variants:
1706///
1707/// ```text
1708/// +---+---------------+
1709/// | # | Operands      |
1710/// +---+---------------+
1711/// | 1 | Xmm, Mem, Imm |
1712/// | 2 | Xmm, Xmm, Imm |
1713/// +---+---------------+
1714/// ```
1715pub trait SseShufpsEmitter<A, B, C> {
1716    fn sse_shufps(&mut self, op0: A, op1: B, op2: C);
1717}
1718
1719impl<'a> SseShufpsEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
1720    fn sse_shufps(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
1721        self.emit(SSE_SHUFPSRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1722    }
1723}
1724
1725impl<'a> SseShufpsEmitter<Xmm, Mem, Imm> for Assembler<'a> {
1726    fn sse_shufps(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
1727        self.emit(SSE_SHUFPSRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1728    }
1729}
1730
1731/// `SSE_SQRTPS` (SQRTPS). 
1732/// Performs a SIMD computation of the square roots of the four, eight or sixteen packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand.
1733///
1734///
1735/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTPS.html).
1736///
1737/// Supported operand variants:
1738///
1739/// ```text
1740/// +---+----------+
1741/// | # | Operands |
1742/// +---+----------+
1743/// | 1 | Xmm, Mem |
1744/// | 2 | Xmm, Xmm |
1745/// +---+----------+
1746/// ```
1747pub trait SseSqrtpsEmitter<A, B> {
1748    fn sse_sqrtps(&mut self, op0: A, op1: B);
1749}
1750
1751impl<'a> SseSqrtpsEmitter<Xmm, Xmm> for Assembler<'a> {
1752    fn sse_sqrtps(&mut self, op0: Xmm, op1: Xmm) {
1753        self.emit(SSE_SQRTPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1754    }
1755}
1756
1757impl<'a> SseSqrtpsEmitter<Xmm, Mem> for Assembler<'a> {
1758    fn sse_sqrtps(&mut self, op0: Xmm, op1: Mem) {
1759        self.emit(SSE_SQRTPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1760    }
1761}
1762
1763/// `SSE_SQRTSS` (SQRTSS). 
1764/// Computes the square root of the low single precision floating-point value in the second source operand and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source and destination operands is an XMM register.
1765///
1766///
1767/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTSS.html).
1768///
1769/// Supported operand variants:
1770///
1771/// ```text
1772/// +---+----------+
1773/// | # | Operands |
1774/// +---+----------+
1775/// | 1 | Xmm, Mem |
1776/// | 2 | Xmm, Xmm |
1777/// +---+----------+
1778/// ```
1779pub trait SseSqrtssEmitter<A, B> {
1780    fn sse_sqrtss(&mut self, op0: A, op1: B);
1781}
1782
1783impl<'a> SseSqrtssEmitter<Xmm, Xmm> for Assembler<'a> {
1784    fn sse_sqrtss(&mut self, op0: Xmm, op1: Xmm) {
1785        self.emit(SSE_SQRTSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1786    }
1787}
1788
1789impl<'a> SseSqrtssEmitter<Xmm, Mem> for Assembler<'a> {
1790    fn sse_sqrtss(&mut self, op0: Xmm, op1: Mem) {
1791        self.emit(SSE_SQRTSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1792    }
1793}
1794
1795/// `SSE_SUBPS` (SUBPS). 
1796/// Performs a SIMD subtract of the packed single precision floating-point values in the second Source operand from the First Source operand, and stores the packed single precision floating-point results in the destination operand.
1797///
1798///
1799/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBPS.html).
1800///
1801/// Supported operand variants:
1802///
1803/// ```text
1804/// +---+----------+
1805/// | # | Operands |
1806/// +---+----------+
1807/// | 1 | Xmm, Mem |
1808/// | 2 | Xmm, Xmm |
1809/// +---+----------+
1810/// ```
1811pub trait SseSubpsEmitter<A, B> {
1812    fn sse_subps(&mut self, op0: A, op1: B);
1813}
1814
1815impl<'a> SseSubpsEmitter<Xmm, Xmm> for Assembler<'a> {
1816    fn sse_subps(&mut self, op0: Xmm, op1: Xmm) {
1817        self.emit(SSE_SUBPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1818    }
1819}
1820
1821impl<'a> SseSubpsEmitter<Xmm, Mem> for Assembler<'a> {
1822    fn sse_subps(&mut self, op0: Xmm, op1: Mem) {
1823        self.emit(SSE_SUBPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1824    }
1825}
1826
1827/// `SSE_SUBSS` (SUBSS). 
1828/// Subtract the low single precision floating-point value from the second source operand and the first source operand and store the double precision floating-point result in the low doubleword of the destination operand.
1829///
1830///
1831/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBSS.html).
1832///
1833/// Supported operand variants:
1834///
1835/// ```text
1836/// +---+----------+
1837/// | # | Operands |
1838/// +---+----------+
1839/// | 1 | Xmm, Mem |
1840/// | 2 | Xmm, Xmm |
1841/// +---+----------+
1842/// ```
1843pub trait SseSubssEmitter<A, B> {
1844    fn sse_subss(&mut self, op0: A, op1: B);
1845}
1846
1847impl<'a> SseSubssEmitter<Xmm, Xmm> for Assembler<'a> {
1848    fn sse_subss(&mut self, op0: Xmm, op1: Xmm) {
1849        self.emit(SSE_SUBSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1850    }
1851}
1852
1853impl<'a> SseSubssEmitter<Xmm, Mem> for Assembler<'a> {
1854    fn sse_subss(&mut self, op0: Xmm, op1: Mem) {
1855        self.emit(SSE_SUBSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1856    }
1857}
1858
1859/// `SSE_UCOMISS` (UCOMISS). 
1860/// Compares the single precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
1861///
1862///
1863/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UCOMISS.html).
1864///
1865/// Supported operand variants:
1866///
1867/// ```text
1868/// +---+----------+
1869/// | # | Operands |
1870/// +---+----------+
1871/// | 1 | Xmm, Mem |
1872/// | 2 | Xmm, Xmm |
1873/// +---+----------+
1874/// ```
1875pub trait SseUcomissEmitter<A, B> {
1876    fn sse_ucomiss(&mut self, op0: A, op1: B);
1877}
1878
1879impl<'a> SseUcomissEmitter<Xmm, Xmm> for Assembler<'a> {
1880    fn sse_ucomiss(&mut self, op0: Xmm, op1: Xmm) {
1881        self.emit(SSE_UCOMISSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1882    }
1883}
1884
1885impl<'a> SseUcomissEmitter<Xmm, Mem> for Assembler<'a> {
1886    fn sse_ucomiss(&mut self, op0: Xmm, op1: Mem) {
1887        self.emit(SSE_UCOMISSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1888    }
1889}
1890
1891/// `SSE_UNPCKHPS` (UNPCKHPS). 
1892/// Performs an interleaved unpack of the high single precision floating-point values from the first source operand and the second source operand.
1893///
1894///
1895/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKHPS.html).
1896///
1897/// Supported operand variants:
1898///
1899/// ```text
1900/// +---+----------+
1901/// | # | Operands |
1902/// +---+----------+
1903/// | 1 | Xmm, Mem |
1904/// | 2 | Xmm, Xmm |
1905/// +---+----------+
1906/// ```
1907pub trait SseUnpckhpsEmitter<A, B> {
1908    fn sse_unpckhps(&mut self, op0: A, op1: B);
1909}
1910
1911impl<'a> SseUnpckhpsEmitter<Xmm, Xmm> for Assembler<'a> {
1912    fn sse_unpckhps(&mut self, op0: Xmm, op1: Xmm) {
1913        self.emit(SSE_UNPCKHPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1914    }
1915}
1916
1917impl<'a> SseUnpckhpsEmitter<Xmm, Mem> for Assembler<'a> {
1918    fn sse_unpckhps(&mut self, op0: Xmm, op1: Mem) {
1919        self.emit(SSE_UNPCKHPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1920    }
1921}
1922
1923/// `SSE_UNPCKLPS` (UNPCKLPS). 
1924/// Performs an interleaved unpack of the low single precision floating-point values from the first source operand and the second source operand.
1925///
1926///
1927/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKLPS.html).
1928///
1929/// Supported operand variants:
1930///
1931/// ```text
1932/// +---+----------+
1933/// | # | Operands |
1934/// +---+----------+
1935/// | 1 | Xmm, Mem |
1936/// | 2 | Xmm, Xmm |
1937/// +---+----------+
1938/// ```
1939pub trait SseUnpcklpsEmitter<A, B> {
1940    fn sse_unpcklps(&mut self, op0: A, op1: B);
1941}
1942
1943impl<'a> SseUnpcklpsEmitter<Xmm, Xmm> for Assembler<'a> {
1944    fn sse_unpcklps(&mut self, op0: Xmm, op1: Xmm) {
1945        self.emit(SSE_UNPCKLPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1946    }
1947}
1948
1949impl<'a> SseUnpcklpsEmitter<Xmm, Mem> for Assembler<'a> {
1950    fn sse_unpcklps(&mut self, op0: Xmm, op1: Mem) {
1951        self.emit(SSE_UNPCKLPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1952    }
1953}
1954
1955/// `SSE_XORPS` (XORPS). 
1956/// Performs a bitwise logical XOR of the four, eight or sixteen packed single-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
1957///
1958///
1959/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/XORPS.html).
1960///
1961/// Supported operand variants:
1962///
1963/// ```text
1964/// +---+----------+
1965/// | # | Operands |
1966/// +---+----------+
1967/// | 1 | Xmm, Mem |
1968/// | 2 | Xmm, Xmm |
1969/// +---+----------+
1970/// ```
1971pub trait SseXorpsEmitter<A, B> {
1972    fn sse_xorps(&mut self, op0: A, op1: B);
1973}
1974
1975impl<'a> SseXorpsEmitter<Xmm, Xmm> for Assembler<'a> {
1976    fn sse_xorps(&mut self, op0: Xmm, op1: Xmm) {
1977        self.emit(SSE_XORPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1978    }
1979}
1980
1981impl<'a> SseXorpsEmitter<Xmm, Mem> for Assembler<'a> {
1982    fn sse_xorps(&mut self, op0: Xmm, op1: Mem) {
1983        self.emit(SSE_XORPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1984    }
1985}
1986
1987/// `STMXCSR` (STMXCSR). 
1988/// Stores the contents of the MXCSR control and status register to the destination operand. The destination operand is a 32-bit memory location. The reserved bits in the MXCSR register are stored as 0s.
1989///
1990///
1991/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/STMXCSR.html).
1992///
1993/// Supported operand variants:
1994///
1995/// ```text
1996/// +---+----------+
1997/// | # | Operands |
1998/// +---+----------+
1999/// | 1 | Mem      |
2000/// +---+----------+
2001/// ```
2002pub trait StmxcsrEmitter<A> {
2003    fn stmxcsr(&mut self, op0: A);
2004}
2005
2006impl<'a> StmxcsrEmitter<Mem> for Assembler<'a> {
2007    fn stmxcsr(&mut self, op0: Mem) {
2008        self.emit(STMXCSRM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
2009    }
2010}
2011
2012
2013impl<'a> Assembler<'a> {
2014    /// `LDMXCSR` (LDMXCSR). 
2015    /// Loads the source operand into the MXCSR control/status register. The source operand is a 32-bit memory location. See “MXCSR Control and Status Register” in Chapter 10, of the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for a description of the MXCSR register and its contents.
2016    ///
2017    ///
2018    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDMXCSR.html).
2019    ///
2020    /// Supported operand variants:
2021    ///
2022    /// ```text
2023    /// +---+----------+
2024    /// | # | Operands |
2025    /// +---+----------+
2026    /// | 1 | Mem      |
2027    /// +---+----------+
2028    /// ```
2029    #[inline]
2030    pub fn ldmxcsr<A>(&mut self, op0: A)
2031    where Assembler<'a>: LdmxcsrEmitter<A> {
2032        <Self as LdmxcsrEmitter<A>>::ldmxcsr(self, op0);
2033    }
2034    /// `MMX_MASKMOVQ` (MASKMOVQ). 
2035    /// Stores selected bytes from the source operand (first operand) into a 64-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are MMX technology registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
2036    ///
2037    ///
2038    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVQ.html).
2039    ///
2040    /// Supported operand variants:
2041    ///
2042    /// ```text
2043    /// +---+----------+
2044    /// | # | Operands |
2045    /// +---+----------+
2046    /// | 1 | Mm, Mm   |
2047    /// +---+----------+
2048    /// ```
2049    #[inline]
2050    pub fn mmx_maskmovq<A, B>(&mut self, op0: A, op1: B)
2051    where Assembler<'a>: MmxMaskmovqEmitter<A, B> {
2052        <Self as MmxMaskmovqEmitter<A, B>>::mmx_maskmovq(self, op0, op1);
2053    }
2054    /// `MMX_MOVDQ2Q`.
2055    ///
2056    /// Supported operand variants:
2057    ///
2058    /// ```text
2059    /// +---+----------+
2060    /// | # | Operands |
2061    /// +---+----------+
2062    /// | 1 | Mm, Xmm  |
2063    /// +---+----------+
2064    /// ```
2065    #[inline]
2066    pub fn mmx_movdq2q<A, B>(&mut self, op0: A, op1: B)
2067    where Assembler<'a>: MmxMovdq2qEmitter<A, B> {
2068        <Self as MmxMovdq2qEmitter<A, B>>::mmx_movdq2q(self, op0, op1);
2069    }
2070    /// `MMX_MOVNTQ` (MOVNTQ). 
2071    /// Moves the quadword in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to minimize cache pollution during the write to memory. The source operand is an MMX technology register, which is assumed to contain packed integer data (packed bytes, words, or doublewords). The destination operand is a 64-bit memory location.
2072    ///
2073    ///
2074    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTQ.html).
2075    ///
2076    /// Supported operand variants:
2077    ///
2078    /// ```text
2079    /// +---+----------+
2080    /// | # | Operands |
2081    /// +---+----------+
2082    /// | 1 | Mem, Mm  |
2083    /// +---+----------+
2084    /// ```
2085    #[inline]
2086    pub fn mmx_movntq<A, B>(&mut self, op0: A, op1: B)
2087    where Assembler<'a>: MmxMovntqEmitter<A, B> {
2088        <Self as MmxMovntqEmitter<A, B>>::mmx_movntq(self, op0, op1);
2089    }
2090    /// `MMX_MOVQ2DQ` (MOVQ2DQ). 
2091    /// Moves the quadword from the source operand (second operand) to the low quadword of the destination operand (first operand). The source operand is an MMX technology register and the destination operand is an XMM register.
2092    ///
2093    ///
2094    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVQ2DQ.html).
2095    ///
2096    /// Supported operand variants:
2097    ///
2098    /// ```text
2099    /// +---+----------+
2100    /// | # | Operands |
2101    /// +---+----------+
2102    /// | 1 | Xmm, Mm  |
2103    /// +---+----------+
2104    /// ```
2105    #[inline]
2106    pub fn mmx_movq2dq<A, B>(&mut self, op0: A, op1: B)
2107    where Assembler<'a>: MmxMovq2dqEmitter<A, B> {
2108        <Self as MmxMovq2dqEmitter<A, B>>::mmx_movq2dq(self, op0, op1);
2109    }
2110    /// `MMX_PAVGB` (PAVGB). 
2111    /// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
2112    ///
2113    ///
2114    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
2115    ///
2116    /// Supported operand variants:
2117    ///
2118    /// ```text
2119    /// +---+----------+
2120    /// | # | Operands |
2121    /// +---+----------+
2122    /// | 1 | Mm, Mem  |
2123    /// | 2 | Mm, Mm   |
2124    /// +---+----------+
2125    /// ```
2126    #[inline]
2127    pub fn mmx_pavgb<A, B>(&mut self, op0: A, op1: B)
2128    where Assembler<'a>: MmxPavgbEmitter<A, B> {
2129        <Self as MmxPavgbEmitter<A, B>>::mmx_pavgb(self, op0, op1);
2130    }
2131    /// `MMX_PAVGW` (PAVGW). 
2132    /// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
2133    ///
2134    ///
2135    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
2136    ///
2137    /// Supported operand variants:
2138    ///
2139    /// ```text
2140    /// +---+----------+
2141    /// | # | Operands |
2142    /// +---+----------+
2143    /// | 1 | Mm, Mem  |
2144    /// | 2 | Mm, Mm   |
2145    /// +---+----------+
2146    /// ```
2147    #[inline]
2148    pub fn mmx_pavgw<A, B>(&mut self, op0: A, op1: B)
2149    where Assembler<'a>: MmxPavgwEmitter<A, B> {
2150        <Self as MmxPavgwEmitter<A, B>>::mmx_pavgw(self, op0, op1);
2151    }
2152    /// `MMX_PEXTRW` (PEXTRW). 
2153    /// Copies the word in the source operand (second operand) specified by the count operand (third operand) to the destination operand (first operand). The source operand can be an MMX technology register or an XMM register. The destination operand can be the low word of a general-purpose register or a 16-bit memory address. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location. The content of the destination register above bit 16 is cleared (set to all 0s).
2154    ///
2155    ///
2156    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRW.html).
2157    ///
2158    /// Supported operand variants:
2159    ///
2160    /// ```text
2161    /// +---+--------------+
2162    /// | # | Operands     |
2163    /// +---+--------------+
2164    /// | 1 | Gpq, Mm, Imm |
2165    /// +---+--------------+
2166    /// ```
2167    #[inline]
2168    pub fn mmx_pextrw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2169    where Assembler<'a>: MmxPextrwEmitter<A, B, C> {
2170        <Self as MmxPextrwEmitter<A, B, C>>::mmx_pextrw(self, op0, op1, op2);
2171    }
2172    /// `MMX_PINSRW` (PINSRW). 
2173    /// Three operand MMX and SSE instructions
2174    ///
2175    ///
2176    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRW.html).
2177    ///
2178    /// Supported operand variants:
2179    ///
2180    /// ```text
2181    /// +---+--------------+
2182    /// | # | Operands     |
2183    /// +---+--------------+
2184    /// | 1 | Mm, Gpd, Imm |
2185    /// | 2 | Mm, Mem, Imm |
2186    /// +---+--------------+
2187    /// ```
2188    #[inline]
2189    pub fn mmx_pinsrw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2190    where Assembler<'a>: MmxPinsrwEmitter<A, B, C> {
2191        <Self as MmxPinsrwEmitter<A, B, C>>::mmx_pinsrw(self, op0, op1, op2);
2192    }
2193    /// `MMX_PMAXSW` (PMAXSW). 
2194    /// Performs a SIMD compare of the packed signed byte, word, dword or qword integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
2195    ///
2196    ///
2197    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXSB%3APMAXSW%3APMAXSD%3APMAXSQ.html).
2198    ///
2199    /// Supported operand variants:
2200    ///
2201    /// ```text
2202    /// +---+----------+
2203    /// | # | Operands |
2204    /// +---+----------+
2205    /// | 1 | Mm, Mem  |
2206    /// | 2 | Mm, Mm   |
2207    /// +---+----------+
2208    /// ```
2209    #[inline]
2210    pub fn mmx_pmaxsw<A, B>(&mut self, op0: A, op1: B)
2211    where Assembler<'a>: MmxPmaxswEmitter<A, B> {
2212        <Self as MmxPmaxswEmitter<A, B>>::mmx_pmaxsw(self, op0, op1);
2213    }
2214    /// `MMX_PMAXUB` (PMAXUB). 
2215    /// Performs a SIMD compare of the packed unsigned byte, word integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
2216    ///
2217    ///
2218    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXUB%3APMAXUW.html).
2219    ///
2220    /// Supported operand variants:
2221    ///
2222    /// ```text
2223    /// +---+----------+
2224    /// | # | Operands |
2225    /// +---+----------+
2226    /// | 1 | Mm, Mem  |
2227    /// | 2 | Mm, Mm   |
2228    /// +---+----------+
2229    /// ```
2230    #[inline]
2231    pub fn mmx_pmaxub<A, B>(&mut self, op0: A, op1: B)
2232    where Assembler<'a>: MmxPmaxubEmitter<A, B> {
2233        <Self as MmxPmaxubEmitter<A, B>>::mmx_pmaxub(self, op0, op1);
2234    }
2235    /// `MMX_PMINSW` (PMINSW). 
2236    /// Performs a SIMD compare of the packed signed byte, word, or dword integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
2237    ///
2238    ///
2239    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINSB%3APMINSW.html).
2240    ///
2241    /// Supported operand variants:
2242    ///
2243    /// ```text
2244    /// +---+----------+
2245    /// | # | Operands |
2246    /// +---+----------+
2247    /// | 1 | Mm, Mem  |
2248    /// | 2 | Mm, Mm   |
2249    /// +---+----------+
2250    /// ```
2251    #[inline]
2252    pub fn mmx_pminsw<A, B>(&mut self, op0: A, op1: B)
2253    where Assembler<'a>: MmxPminswEmitter<A, B> {
2254        <Self as MmxPminswEmitter<A, B>>::mmx_pminsw(self, op0, op1);
2255    }
2256    /// `MMX_PMINUB` (PMINUB). 
2257    /// Performs a SIMD compare of the packed unsigned byte or word integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
2258    ///
2259    ///
2260    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINUB%3APMINUW.html).
2261    ///
2262    /// Supported operand variants:
2263    ///
2264    /// ```text
2265    /// +---+----------+
2266    /// | # | Operands |
2267    /// +---+----------+
2268    /// | 1 | Mm, Mem  |
2269    /// | 2 | Mm, Mm   |
2270    /// +---+----------+
2271    /// ```
2272    #[inline]
2273    pub fn mmx_pminub<A, B>(&mut self, op0: A, op1: B)
2274    where Assembler<'a>: MmxPminubEmitter<A, B> {
2275        <Self as MmxPminubEmitter<A, B>>::mmx_pminub(self, op0, op1);
2276    }
2277    /// `MMX_PMOVMSKB` (PMOVMSKB). 
2278    /// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
2279    ///
2280    ///
2281    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
2282    ///
2283    /// Supported operand variants:
2284    ///
2285    /// ```text
2286    /// +---+----------+
2287    /// | # | Operands |
2288    /// +---+----------+
2289    /// | 1 | Gpq, Mm  |
2290    /// +---+----------+
2291    /// ```
2292    #[inline]
2293    pub fn mmx_pmovmskb<A, B>(&mut self, op0: A, op1: B)
2294    where Assembler<'a>: MmxPmovmskbEmitter<A, B> {
2295        <Self as MmxPmovmskbEmitter<A, B>>::mmx_pmovmskb(self, op0, op1);
2296    }
2297    /// `MMX_PMULHUW` (PMULHUW). 
2298    /// Performs a SIMD unsigned multiply of the packed unsigned word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each 32-bit intermediate results in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
2299    ///
2300    ///
2301    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHUW.html).
2302    ///
2303    /// Supported operand variants:
2304    ///
2305    /// ```text
2306    /// +---+----------+
2307    /// | # | Operands |
2308    /// +---+----------+
2309    /// | 1 | Mm, Mem  |
2310    /// | 2 | Mm, Mm   |
2311    /// +---+----------+
2312    /// ```
2313    #[inline]
2314    pub fn mmx_pmulhuw<A, B>(&mut self, op0: A, op1: B)
2315    where Assembler<'a>: MmxPmulhuwEmitter<A, B> {
2316        <Self as MmxPmulhuwEmitter<A, B>>::mmx_pmulhuw(self, op0, op1);
2317    }
2318    /// `MMX_PSADBW` (PSADBW). 
2319    /// Computes the absolute value of the difference of 8 unsigned byte integers from the source operand (second operand) and from the destination operand (first operand). These 8 differences are then summed to produce an unsigned word integer result that is stored in the destination operand. Figure 4-14 shows the operation of the PSADBW instruction when using 64-bit operands.
2320    ///
2321    ///
2322    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSADBW.html).
2323    ///
2324    /// Supported operand variants:
2325    ///
2326    /// ```text
2327    /// +---+----------+
2328    /// | # | Operands |
2329    /// +---+----------+
2330    /// | 1 | Mm, Mem  |
2331    /// | 2 | Mm, Mm   |
2332    /// +---+----------+
2333    /// ```
2334    #[inline]
2335    pub fn mmx_psadbw<A, B>(&mut self, op0: A, op1: B)
2336    where Assembler<'a>: MmxPsadbwEmitter<A, B> {
2337        <Self as MmxPsadbwEmitter<A, B>>::mmx_psadbw(self, op0, op1);
2338    }
2339    /// `MMX_PSHUFW` (PSHUFW). 
2340    /// Copies words from the source operand (second operand) and inserts them in the destination operand (first operand) at word locations selected with the order operand (third operand). This operation is similar to the operation used by the PSHUFD instruction, which is illustrated in Figure 4-16. For the PSHUFW instruction, each 2-bit field in the order operand selects the contents of one word location in the destination operand. The encodings of the order operand fields select words from the source operand to be copied to the destination operand.
2341    ///
2342    ///
2343    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFW.html).
2344    ///
2345    /// Supported operand variants:
2346    ///
2347    /// ```text
2348    /// +---+--------------+
2349    /// | # | Operands     |
2350    /// +---+--------------+
2351    /// | 1 | Mm, Mem, Imm |
2352    /// | 2 | Mm, Mm, Imm  |
2353    /// +---+--------------+
2354    /// ```
2355    #[inline]
2356    pub fn mmx_pshufw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2357    where Assembler<'a>: MmxPshufwEmitter<A, B, C> {
2358        <Self as MmxPshufwEmitter<A, B, C>>::mmx_pshufw(self, op0, op1, op2);
2359    }
2360    /// `PREFETCHNTA` (PREFETCHNTA). 
2361    /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2362    ///
2363    ///
2364    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2365    ///
2366    /// Supported operand variants:
2367    ///
2368    /// ```text
2369    /// +---+----------+
2370    /// | # | Operands |
2371    /// +---+----------+
2372    /// | 1 | Mem      |
2373    /// +---+----------+
2374    /// ```
2375    #[inline]
2376    pub fn prefetchnta<A>(&mut self, op0: A)
2377    where Assembler<'a>: PrefetchntaEmitter<A> {
2378        <Self as PrefetchntaEmitter<A>>::prefetchnta(self, op0);
2379    }
2380    /// `PREFETCHT0` (PREFETCHT0). 
2381    /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2382    ///
2383    ///
2384    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2385    ///
2386    /// Supported operand variants:
2387    ///
2388    /// ```text
2389    /// +---+----------+
2390    /// | # | Operands |
2391    /// +---+----------+
2392    /// | 1 | Mem      |
2393    /// +---+----------+
2394    /// ```
2395    #[inline]
2396    pub fn prefetcht0<A>(&mut self, op0: A)
2397    where Assembler<'a>: Prefetcht0Emitter<A> {
2398        <Self as Prefetcht0Emitter<A>>::prefetcht0(self, op0);
2399    }
2400    /// `PREFETCHT1` (PREFETCHT1). 
2401    /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2402    ///
2403    ///
2404    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2405    ///
2406    /// Supported operand variants:
2407    ///
2408    /// ```text
2409    /// +---+----------+
2410    /// | # | Operands |
2411    /// +---+----------+
2412    /// | 1 | Mem      |
2413    /// +---+----------+
2414    /// ```
2415    #[inline]
2416    pub fn prefetcht1<A>(&mut self, op0: A)
2417    where Assembler<'a>: Prefetcht1Emitter<A> {
2418        <Self as Prefetcht1Emitter<A>>::prefetcht1(self, op0);
2419    }
2420    /// `PREFETCHT2` (PREFETCHT2). 
2421    /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2422    ///
2423    ///
2424    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2425    ///
2426    /// Supported operand variants:
2427    ///
2428    /// ```text
2429    /// +---+----------+
2430    /// | # | Operands |
2431    /// +---+----------+
2432    /// | 1 | Mem      |
2433    /// +---+----------+
2434    /// ```
2435    #[inline]
2436    pub fn prefetcht2<A>(&mut self, op0: A)
2437    where Assembler<'a>: Prefetcht2Emitter<A> {
2438        <Self as Prefetcht2Emitter<A>>::prefetcht2(self, op0);
2439    }
2440    /// `SFENCE` (SFENCE). 
2441    /// Orders processor execution relative to all memory stores prior to the SFENCE instruction. The processor ensures that every store prior to SFENCE is globally visible before any store after SFENCE becomes globally visible. The SFENCE instruction is ordered with respect to memory stores, other SFENCE instructions, MFENCE instructions, and any serializing instructions (such as the CPUID instruction). It is not ordered with respect to memory loads or the LFENCE instruction.
2442    ///
2443    ///
2444    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SFENCE.html).
2445    ///
2446    /// Supported operand variants:
2447    ///
2448    /// ```text
2449    /// +---+----------+
2450    /// | # | Operands |
2451    /// +---+----------+
2452    /// | 1 | (none)   |
2453    /// +---+----------+
2454    /// ```
2455    #[inline]
2456    pub fn sfence(&mut self)
2457    where Assembler<'a>: SfenceEmitter {
2458        <Self as SfenceEmitter>::sfence(self);
2459    }
2460    /// `SSE_ADDPS` (ADDPS). 
2461    /// Adds four, eight or sixteen packed single precision floating-point values from the first source operand with the second source operand, and stores the packed single precision floating-point result in the destination operand.
2462    ///
2463    ///
2464    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDPS.html).
2465    ///
2466    /// Supported operand variants:
2467    ///
2468    /// ```text
2469    /// +---+----------+
2470    /// | # | Operands |
2471    /// +---+----------+
2472    /// | 1 | Xmm, Mem |
2473    /// | 2 | Xmm, Xmm |
2474    /// +---+----------+
2475    /// ```
2476    #[inline]
2477    pub fn sse_addps<A, B>(&mut self, op0: A, op1: B)
2478    where Assembler<'a>: SseAddpsEmitter<A, B> {
2479        <Self as SseAddpsEmitter<A, B>>::sse_addps(self, op0, op1);
2480    }
2481    /// `SSE_ADDSS` (ADDSS). 
2482    /// Adds the low single precision floating-point values from the second source operand and the first source operand, and stores the double precision floating-point result in the destination operand.
2483    ///
2484    ///
2485    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSS.html).
2486    ///
2487    /// Supported operand variants:
2488    ///
2489    /// ```text
2490    /// +---+----------+
2491    /// | # | Operands |
2492    /// +---+----------+
2493    /// | 1 | Xmm, Mem |
2494    /// | 2 | Xmm, Xmm |
2495    /// +---+----------+
2496    /// ```
2497    #[inline]
2498    pub fn sse_addss<A, B>(&mut self, op0: A, op1: B)
2499    where Assembler<'a>: SseAddssEmitter<A, B> {
2500        <Self as SseAddssEmitter<A, B>>::sse_addss(self, op0, op1);
2501    }
2502    /// `SSE_ANDNPS` (ANDNPS). 
2503    /// Performs a bitwise logical AND NOT of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
2504    ///
2505    ///
2506    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDNPS.html).
2507    ///
2508    /// Supported operand variants:
2509    ///
2510    /// ```text
2511    /// +---+----------+
2512    /// | # | Operands |
2513    /// +---+----------+
2514    /// | 1 | Xmm, Mem |
2515    /// | 2 | Xmm, Xmm |
2516    /// +---+----------+
2517    /// ```
2518    #[inline]
2519    pub fn sse_andnps<A, B>(&mut self, op0: A, op1: B)
2520    where Assembler<'a>: SseAndnpsEmitter<A, B> {
2521        <Self as SseAndnpsEmitter<A, B>>::sse_andnps(self, op0, op1);
2522    }
2523    /// `SSE_ANDPS` (ANDPS). 
2524    /// Performs a bitwise logical AND of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
2525    ///
2526    ///
2527    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDPS.html).
2528    ///
2529    /// Supported operand variants:
2530    ///
2531    /// ```text
2532    /// +---+----------+
2533    /// | # | Operands |
2534    /// +---+----------+
2535    /// | 1 | Xmm, Mem |
2536    /// | 2 | Xmm, Xmm |
2537    /// +---+----------+
2538    /// ```
2539    #[inline]
2540    pub fn sse_andps<A, B>(&mut self, op0: A, op1: B)
2541    where Assembler<'a>: SseAndpsEmitter<A, B> {
2542        <Self as SseAndpsEmitter<A, B>>::sse_andps(self, op0, op1);
2543    }
2544    /// `SSE_CMPPS` (CMPPS). 
2545    /// Performs a SIMD compare of the packed single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
2546    ///
2547    ///
2548    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPS.html).
2549    ///
2550    /// Supported operand variants:
2551    ///
2552    /// ```text
2553    /// +---+---------------+
2554    /// | # | Operands      |
2555    /// +---+---------------+
2556    /// | 1 | Xmm, Mem, Imm |
2557    /// | 2 | Xmm, Xmm, Imm |
2558    /// +---+---------------+
2559    /// ```
2560    #[inline]
2561    pub fn sse_cmpps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2562    where Assembler<'a>: SseCmppsEmitter<A, B, C> {
2563        <Self as SseCmppsEmitter<A, B, C>>::sse_cmpps(self, op0, op1, op2);
2564    }
2565    /// `SSE_CMPSS` (CMPSS). 
2566    /// Compares the low single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
2567    ///
2568    ///
2569    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPSS.html).
2570    ///
2571    /// Supported operand variants:
2572    ///
2573    /// ```text
2574    /// +---+---------------+
2575    /// | # | Operands      |
2576    /// +---+---------------+
2577    /// | 1 | Xmm, Mem, Imm |
2578    /// | 2 | Xmm, Xmm, Imm |
2579    /// +---+---------------+
2580    /// ```
2581    #[inline]
2582    pub fn sse_cmpss<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2583    where Assembler<'a>: SseCmpssEmitter<A, B, C> {
2584        <Self as SseCmpssEmitter<A, B, C>>::sse_cmpss(self, op0, op1, op2);
2585    }
2586    /// `SSE_COMISS` (COMISS). 
2587    /// Compares the single precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
2588    ///
2589    ///
2590    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/COMISS.html).
2591    ///
2592    /// Supported operand variants:
2593    ///
2594    /// ```text
2595    /// +---+----------+
2596    /// | # | Operands |
2597    /// +---+----------+
2598    /// | 1 | Xmm, Mem |
2599    /// | 2 | Xmm, Xmm |
2600    /// +---+----------+
2601    /// ```
2602    #[inline]
2603    pub fn sse_comiss<A, B>(&mut self, op0: A, op1: B)
2604    where Assembler<'a>: SseComissEmitter<A, B> {
2605        <Self as SseComissEmitter<A, B>>::sse_comiss(self, op0, op1);
2606    }
2607    /// `SSE_CVTSI2SS` (CVTSI2SS). 
2608    /// Converts a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the “convert-from” source operand to a single precision floating-point value in the destination operand (first operand). The “convert-from” source operand can be a general-purpose register or a memory location. The destination operand is an XMM register. The result is stored in the low doubleword of the destination operand, and the upper three doublewords are left unchanged. When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits.
2609    ///
2610    ///
2611    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSI2SS.html).
2612    ///
2613    /// Supported operand variants:
2614    ///
2615    /// ```text
2616    /// +---+----------+
2617    /// | # | Operands |
2618    /// +---+----------+
2619    /// | 1 | Xmm, Gpd |
2620    /// | 2 | Xmm, Gpq |
2621    /// | 3 | Xmm, Mem |
2622    /// +---+----------+
2623    /// ```
2624    #[inline]
2625    pub fn sse_cvtsi2ss<A, B>(&mut self, op0: A, op1: B)
2626    where Assembler<'a>: SseCvtsi2ssEmitter<A, B> {
2627        <Self as SseCvtsi2ssEmitter<A, B>>::sse_cvtsi2ss(self, op0, op1);
2628    }
2629    /// `SSE_CVTSS2SI` (CVTSS2SI). 
2630    /// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
2631    ///
2632    ///
2633    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSS2SI.html).
2634    ///
2635    /// Supported operand variants:
2636    ///
2637    /// ```text
2638    /// +---+----------+
2639    /// | # | Operands |
2640    /// +---+----------+
2641    /// | 1 | Gpd, Mem |
2642    /// | 2 | Gpd, Xmm |
2643    /// | 3 | Gpq, Mem |
2644    /// | 4 | Gpq, Xmm |
2645    /// +---+----------+
2646    /// ```
2647    #[inline]
2648    pub fn sse_cvtss2si<A, B>(&mut self, op0: A, op1: B)
2649    where Assembler<'a>: SseCvtss2siEmitter<A, B> {
2650        <Self as SseCvtss2siEmitter<A, B>>::sse_cvtss2si(self, op0, op1);
2651    }
2652    /// `SSE_CVTTSS2SI` (CVTTSS2SI). 
2653    /// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
2654    ///
2655    ///
2656    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTSS2SI.html).
2657    ///
2658    /// Supported operand variants:
2659    ///
2660    /// ```text
2661    /// +---+----------+
2662    /// | # | Operands |
2663    /// +---+----------+
2664    /// | 1 | Gpd, Mem |
2665    /// | 2 | Gpd, Xmm |
2666    /// | 3 | Gpq, Mem |
2667    /// | 4 | Gpq, Xmm |
2668    /// +---+----------+
2669    /// ```
2670    #[inline]
2671    pub fn sse_cvttss2si<A, B>(&mut self, op0: A, op1: B)
2672    where Assembler<'a>: SseCvttss2siEmitter<A, B> {
2673        <Self as SseCvttss2siEmitter<A, B>>::sse_cvttss2si(self, op0, op1);
2674    }
2675    /// `SSE_DIVPS` (DIVPS). 
2676    /// Performs a SIMD divide of the four, eight or sixteen packed single precision floating-point values in the first source operand (the second operand) by the four, eight or sixteen packed single precision floating-point values in the second source operand (the third operand). Results are written to the destination operand (the first operand).
2677    ///
2678    ///
2679    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVPS.html).
2680    ///
2681    /// Supported operand variants:
2682    ///
2683    /// ```text
2684    /// +---+----------+
2685    /// | # | Operands |
2686    /// +---+----------+
2687    /// | 1 | Xmm, Mem |
2688    /// | 2 | Xmm, Xmm |
2689    /// +---+----------+
2690    /// ```
2691    #[inline]
2692    pub fn sse_divps<A, B>(&mut self, op0: A, op1: B)
2693    where Assembler<'a>: SseDivpsEmitter<A, B> {
2694        <Self as SseDivpsEmitter<A, B>>::sse_divps(self, op0, op1);
2695    }
2696    /// `SSE_DIVSS` (DIVSS). 
2697    /// Divides the low single precision floating-point value in the first source operand by the low single precision floating-point value in the second source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location.
2698    ///
2699    ///
2700    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVSS.html).
2701    ///
2702    /// Supported operand variants:
2703    ///
2704    /// ```text
2705    /// +---+----------+
2706    /// | # | Operands |
2707    /// +---+----------+
2708    /// | 1 | Xmm, Mem |
2709    /// | 2 | Xmm, Xmm |
2710    /// +---+----------+
2711    /// ```
2712    #[inline]
2713    pub fn sse_divss<A, B>(&mut self, op0: A, op1: B)
2714    where Assembler<'a>: SseDivssEmitter<A, B> {
2715        <Self as SseDivssEmitter<A, B>>::sse_divss(self, op0, op1);
2716    }
2717    /// `SSE_MAXPS` (MAXPS). 
2718    /// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand.
2719    ///
2720    ///
2721    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXPS.html).
2722    ///
2723    /// Supported operand variants:
2724    ///
2725    /// ```text
2726    /// +---+----------+
2727    /// | # | Operands |
2728    /// +---+----------+
2729    /// | 1 | Xmm, Mem |
2730    /// | 2 | Xmm, Xmm |
2731    /// +---+----------+
2732    /// ```
2733    #[inline]
2734    pub fn sse_maxps<A, B>(&mut self, op0: A, op1: B)
2735    where Assembler<'a>: SseMaxpsEmitter<A, B> {
2736        <Self as SseMaxpsEmitter<A, B>>::sse_maxps(self, op0, op1);
2737    }
2738    /// `SSE_MAXSS` (MAXSS). 
2739    /// Compares the low single precision floating-point values in the first source operand and the second source operand, and returns the maximum value to the low doubleword of the destination operand.
2740    ///
2741    ///
2742    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXSS.html).
2743    ///
2744    /// Supported operand variants:
2745    ///
2746    /// ```text
2747    /// +---+----------+
2748    /// | # | Operands |
2749    /// +---+----------+
2750    /// | 1 | Xmm, Mem |
2751    /// | 2 | Xmm, Xmm |
2752    /// +---+----------+
2753    /// ```
2754    #[inline]
2755    pub fn sse_maxss<A, B>(&mut self, op0: A, op1: B)
2756    where Assembler<'a>: SseMaxssEmitter<A, B> {
2757        <Self as SseMaxssEmitter<A, B>>::sse_maxss(self, op0, op1);
2758    }
2759    /// `SSE_MINPS` (MINPS). 
2760    /// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the minimum value for each pair of values to the destination operand.
2761    ///
2762    ///
2763    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINPS.html).
2764    ///
2765    /// Supported operand variants:
2766    ///
2767    /// ```text
2768    /// +---+----------+
2769    /// | # | Operands |
2770    /// +---+----------+
2771    /// | 1 | Xmm, Mem |
2772    /// | 2 | Xmm, Xmm |
2773    /// +---+----------+
2774    /// ```
2775    #[inline]
2776    pub fn sse_minps<A, B>(&mut self, op0: A, op1: B)
2777    where Assembler<'a>: SseMinpsEmitter<A, B> {
2778        <Self as SseMinpsEmitter<A, B>>::sse_minps(self, op0, op1);
2779    }
2780    /// `SSE_MINSS` (MINSS). 
2781    /// Compares the low single precision floating-point values in the first source operand and the second source operand and returns the minimum value to the low doubleword of the destination operand.
2782    ///
2783    ///
2784    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINSS.html).
2785    ///
2786    /// Supported operand variants:
2787    ///
2788    /// ```text
2789    /// +---+----------+
2790    /// | # | Operands |
2791    /// +---+----------+
2792    /// | 1 | Xmm, Mem |
2793    /// | 2 | Xmm, Xmm |
2794    /// +---+----------+
2795    /// ```
2796    #[inline]
2797    pub fn sse_minss<A, B>(&mut self, op0: A, op1: B)
2798    where Assembler<'a>: SseMinssEmitter<A, B> {
2799        <Self as SseMinssEmitter<A, B>>::sse_minss(self, op0, op1);
2800    }
2801    /// `SSE_MOVAPS` (MOVAPS). 
2802    /// Moves 4, 8 or 16 single precision floating-point values from the source operand (second operand) to the destination operand (first operand). This instruction can be used to load an XMM, YMM or ZMM register from an 128-bit, 256-bit or 512-bit memory location, to store the contents of an XMM, YMM or ZMM register into a 128-bit, 256-bit or 512-bit memory location, or to move data between two XMM, two YMM or two ZMM registers.
2803    ///
2804    ///
2805    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVAPS.html).
2806    ///
2807    /// Supported operand variants:
2808    ///
2809    /// ```text
2810    /// +---+----------+
2811    /// | # | Operands |
2812    /// +---+----------+
2813    /// | 1 | Mem, Xmm |
2814    /// | 2 | Xmm, Mem |
2815    /// | 3 | Xmm, Xmm |
2816    /// +---+----------+
2817    /// ```
2818    #[inline]
2819    pub fn sse_movaps<A, B>(&mut self, op0: A, op1: B)
2820    where Assembler<'a>: SseMovapsEmitter<A, B> {
2821        <Self as SseMovapsEmitter<A, B>>::sse_movaps(self, op0, op1);
2822    }
2823    /// `SSE_MOVHLPS` (MOVHLPS). 
2824    /// This instruction cannot be used for memory to register moves.
2825    ///
2826    ///
2827    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHLPS.html).
2828    ///
2829    /// Supported operand variants:
2830    ///
2831    /// ```text
2832    /// +---+----------+
2833    /// | # | Operands |
2834    /// +---+----------+
2835    /// | 1 | Xmm, Xmm |
2836    /// +---+----------+
2837    /// ```
2838    #[inline]
2839    pub fn sse_movhlps<A, B>(&mut self, op0: A, op1: B)
2840    where Assembler<'a>: SseMovhlpsEmitter<A, B> {
2841        <Self as SseMovhlpsEmitter<A, B>>::sse_movhlps(self, op0, op1);
2842    }
2843    /// `SSE_MOVHPS` (MOVHPS). 
2844    /// This instruction cannot be used for register to register or memory to memory moves.
2845    ///
2846    ///
2847    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHPS.html).
2848    ///
2849    /// Supported operand variants:
2850    ///
2851    /// ```text
2852    /// +---+----------+
2853    /// | # | Operands |
2854    /// +---+----------+
2855    /// | 1 | Mem, Xmm |
2856    /// | 2 | Xmm, Mem |
2857    /// +---+----------+
2858    /// ```
2859    #[inline]
2860    pub fn sse_movhps<A, B>(&mut self, op0: A, op1: B)
2861    where Assembler<'a>: SseMovhpsEmitter<A, B> {
2862        <Self as SseMovhpsEmitter<A, B>>::sse_movhps(self, op0, op1);
2863    }
2864    /// `SSE_MOVLHPS` (MOVLHPS). 
2865    /// This instruction cannot be used for memory to register moves.
2866    ///
2867    ///
2868    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLHPS.html).
2869    ///
2870    /// Supported operand variants:
2871    ///
2872    /// ```text
2873    /// +---+----------+
2874    /// | # | Operands |
2875    /// +---+----------+
2876    /// | 1 | Xmm, Xmm |
2877    /// +---+----------+
2878    /// ```
2879    #[inline]
2880    pub fn sse_movlhps<A, B>(&mut self, op0: A, op1: B)
2881    where Assembler<'a>: SseMovlhpsEmitter<A, B> {
2882        <Self as SseMovlhpsEmitter<A, B>>::sse_movlhps(self, op0, op1);
2883    }
2884    /// `SSE_MOVLPS` (MOVLPS). 
2885    /// This instruction cannot be used for register to register or memory to memory moves.
2886    ///
2887    ///
2888    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLPS.html).
2889    ///
2890    /// Supported operand variants:
2891    ///
2892    /// ```text
2893    /// +---+----------+
2894    /// | # | Operands |
2895    /// +---+----------+
2896    /// | 1 | Mem, Xmm |
2897    /// | 2 | Xmm, Mem |
2898    /// +---+----------+
2899    /// ```
2900    #[inline]
2901    pub fn sse_movlps<A, B>(&mut self, op0: A, op1: B)
2902    where Assembler<'a>: SseMovlpsEmitter<A, B> {
2903        <Self as SseMovlpsEmitter<A, B>>::sse_movlps(self, op0, op1);
2904    }
2905    /// `SSE_MOVMSKPS` (MOVMSKPS). 
2906    /// Extracts the sign bits from the packed single precision floating-point values in the source operand (second operand), formats them into a 4- or 8-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM or YMM register, and the destination operand is a general-purpose register. The mask is stored in the 4 or 8 low-order bits of the destination operand. The upper bits of the destination operand beyond the mask are filled with zeros.
2907    ///
2908    ///
2909    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPS.html).
2910    ///
2911    /// Supported operand variants:
2912    ///
2913    /// ```text
2914    /// +---+----------+
2915    /// | # | Operands |
2916    /// +---+----------+
2917    /// | 1 | Gpq, Xmm |
2918    /// +---+----------+
2919    /// ```
2920    #[inline]
2921    pub fn sse_movmskps<A, B>(&mut self, op0: A, op1: B)
2922    where Assembler<'a>: SseMovmskpsEmitter<A, B> {
2923        <Self as SseMovmskpsEmitter<A, B>>::sse_movmskps(self, op0, op1);
2924    }
2925    /// `SSE_MOVNTPS` (MOVNTPS). 
2926    /// Moves the packed single precision floating-point values in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain packed single precision, floating-pointing. The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (EVEX.512 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.
2927    ///
2928    ///
2929    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTPS.html).
2930    ///
2931    /// Supported operand variants:
2932    ///
2933    /// ```text
2934    /// +---+----------+
2935    /// | # | Operands |
2936    /// +---+----------+
2937    /// | 1 | Mem, Xmm |
2938    /// +---+----------+
2939    /// ```
2940    #[inline]
2941    pub fn sse_movntps<A, B>(&mut self, op0: A, op1: B)
2942    where Assembler<'a>: SseMovntpsEmitter<A, B> {
2943        <Self as SseMovntpsEmitter<A, B>>::sse_movntps(self, op0, op1);
2944    }
2945    /// `SSE_MOVNTSS`.
2946    ///
2947    /// Supported operand variants:
2948    ///
2949    /// ```text
2950    /// +---+----------+
2951    /// | # | Operands |
2952    /// +---+----------+
2953    /// | 1 | Mem, Xmm |
2954    /// +---+----------+
2955    /// ```
2956    #[inline]
2957    pub fn sse_movntss<A, B>(&mut self, op0: A, op1: B)
2958    where Assembler<'a>: SseMovntssEmitter<A, B> {
2959        <Self as SseMovntssEmitter<A, B>>::sse_movntss(self, op0, op1);
2960    }
2961    /// `SSE_MOVSS` (MOVSS). 
2962    /// Moves a scalar single precision floating-point value from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be XMM registers or 32-bit memory locations. This instruction can be used to move a single precision floating-point value to and from the low doubleword of an XMM register and a 32-bit memory location, or to move a single precision floating-point value between the low doublewords of two XMM registers. The instruction cannot be used to transfer data between memory locations.
2963    ///
2964    ///
2965    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVSS.html).
2966    ///
2967    /// Supported operand variants:
2968    ///
2969    /// ```text
2970    /// +---+----------+
2971    /// | # | Operands |
2972    /// +---+----------+
2973    /// | 1 | Mem, Xmm |
2974    /// | 2 | Xmm, Mem |
2975    /// | 3 | Xmm, Xmm |
2976    /// +---+----------+
2977    /// ```
2978    #[inline]
2979    pub fn sse_movss<A, B>(&mut self, op0: A, op1: B)
2980    where Assembler<'a>: SseMovssEmitter<A, B> {
2981        <Self as SseMovssEmitter<A, B>>::sse_movss(self, op0, op1);
2982    }
2983    /// `SSE_MOVUPS` (MOVUPS). 
2984    /// Note: VEX.vvvv and EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
2985    ///
2986    ///
2987    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVUPS.html).
2988    ///
2989    /// Supported operand variants:
2990    ///
2991    /// ```text
2992    /// +---+----------+
2993    /// | # | Operands |
2994    /// +---+----------+
2995    /// | 1 | Mem, Xmm |
2996    /// | 2 | Xmm, Mem |
2997    /// | 3 | Xmm, Xmm |
2998    /// +---+----------+
2999    /// ```
3000    #[inline]
3001    pub fn sse_movups<A, B>(&mut self, op0: A, op1: B)
3002    where Assembler<'a>: SseMovupsEmitter<A, B> {
3003        <Self as SseMovupsEmitter<A, B>>::sse_movups(self, op0, op1);
3004    }
3005    /// `SSE_MULPS` (MULPS). 
3006    /// Multiply the packed single precision floating-point values from the first source operand with the corresponding values in the second source operand, and stores the packed double precision floating-point results in the destination operand.
3007    ///
3008    ///
3009    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULPS.html).
3010    ///
3011    /// Supported operand variants:
3012    ///
3013    /// ```text
3014    /// +---+----------+
3015    /// | # | Operands |
3016    /// +---+----------+
3017    /// | 1 | Xmm, Mem |
3018    /// | 2 | Xmm, Xmm |
3019    /// +---+----------+
3020    /// ```
3021    #[inline]
3022    pub fn sse_mulps<A, B>(&mut self, op0: A, op1: B)
3023    where Assembler<'a>: SseMulpsEmitter<A, B> {
3024        <Self as SseMulpsEmitter<A, B>>::sse_mulps(self, op0, op1);
3025    }
3026    /// `SSE_MULSS` (MULSS). 
3027    /// Multiplies the low single precision floating-point value from the second source operand by the low single precision floating-point value in the first source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source operand and the destination operands are XMM registers.
3028    ///
3029    ///
3030    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULSS.html).
3031    ///
3032    /// Supported operand variants:
3033    ///
3034    /// ```text
3035    /// +---+----------+
3036    /// | # | Operands |
3037    /// +---+----------+
3038    /// | 1 | Xmm, Mem |
3039    /// | 2 | Xmm, Xmm |
3040    /// +---+----------+
3041    /// ```
3042    #[inline]
3043    pub fn sse_mulss<A, B>(&mut self, op0: A, op1: B)
3044    where Assembler<'a>: SseMulssEmitter<A, B> {
3045        <Self as SseMulssEmitter<A, B>>::sse_mulss(self, op0, op1);
3046    }
3047    /// `SSE_ORPS` (ORPS). 
3048    /// Performs a bitwise logical OR of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
3049    ///
3050    ///
3051    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ORPS.html).
3052    ///
3053    /// Supported operand variants:
3054    ///
3055    /// ```text
3056    /// +---+----------+
3057    /// | # | Operands |
3058    /// +---+----------+
3059    /// | 1 | Xmm, Mem |
3060    /// | 2 | Xmm, Xmm |
3061    /// +---+----------+
3062    /// ```
3063    #[inline]
3064    pub fn sse_orps<A, B>(&mut self, op0: A, op1: B)
3065    where Assembler<'a>: SseOrpsEmitter<A, B> {
3066        <Self as SseOrpsEmitter<A, B>>::sse_orps(self, op0, op1);
3067    }
3068    /// `SSE_RCPPS` (RCPPS). 
3069    /// Performs a SIMD computation of the approximate reciprocals of the four packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
3070    ///
3071    ///
3072    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPPS.html).
3073    ///
3074    /// Supported operand variants:
3075    ///
3076    /// ```text
3077    /// +---+----------+
3078    /// | # | Operands |
3079    /// +---+----------+
3080    /// | 1 | Xmm, Mem |
3081    /// | 2 | Xmm, Xmm |
3082    /// +---+----------+
3083    /// ```
3084    #[inline]
3085    pub fn sse_rcpps<A, B>(&mut self, op0: A, op1: B)
3086    where Assembler<'a>: SseRcppsEmitter<A, B> {
3087        <Self as SseRcppsEmitter<A, B>>::sse_rcpps(self, op0, op1);
3088    }
3089    /// `SSE_RCPSS` (RCPSS). 
3090    /// Computes of an approximate reciprocal of the low single precision floating-point value in the source operand (second operand) and stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
3091    ///
3092    ///
3093    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPSS.html).
3094    ///
3095    /// Supported operand variants:
3096    ///
3097    /// ```text
3098    /// +---+----------+
3099    /// | # | Operands |
3100    /// +---+----------+
3101    /// | 1 | Xmm, Mem |
3102    /// | 2 | Xmm, Xmm |
3103    /// +---+----------+
3104    /// ```
3105    #[inline]
3106    pub fn sse_rcpss<A, B>(&mut self, op0: A, op1: B)
3107    where Assembler<'a>: SseRcpssEmitter<A, B> {
3108        <Self as SseRcpssEmitter<A, B>>::sse_rcpss(self, op0, op1);
3109    }
3110    /// `SSE_RSQRTPS` (RSQRTPS). 
3111    /// Performs a SIMD computation of the approximate reciprocals of the square roots of the four packed single precision floating-point values in the source operand (second operand) and stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
3112    ///
3113    ///
3114    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTPS.html).
3115    ///
3116    /// Supported operand variants:
3117    ///
3118    /// ```text
3119    /// +---+----------+
3120    /// | # | Operands |
3121    /// +---+----------+
3122    /// | 1 | Xmm, Mem |
3123    /// | 2 | Xmm, Xmm |
3124    /// +---+----------+
3125    /// ```
3126    #[inline]
3127    pub fn sse_rsqrtps<A, B>(&mut self, op0: A, op1: B)
3128    where Assembler<'a>: SseRsqrtpsEmitter<A, B> {
3129        <Self as SseRsqrtpsEmitter<A, B>>::sse_rsqrtps(self, op0, op1);
3130    }
3131    /// `SSE_RSQRTSS` (RSQRTSS). 
3132    /// Computes an approximate reciprocal of the square root of the low single precision floating-point value in the source operand (second operand) stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
3133    ///
3134    ///
3135    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTSS.html).
3136    ///
3137    /// Supported operand variants:
3138    ///
3139    /// ```text
3140    /// +---+----------+
3141    /// | # | Operands |
3142    /// +---+----------+
3143    /// | 1 | Xmm, Mem |
3144    /// | 2 | Xmm, Xmm |
3145    /// +---+----------+
3146    /// ```
3147    #[inline]
3148    pub fn sse_rsqrtss<A, B>(&mut self, op0: A, op1: B)
3149    where Assembler<'a>: SseRsqrtssEmitter<A, B> {
3150        <Self as SseRsqrtssEmitter<A, B>>::sse_rsqrtss(self, op0, op1);
3151    }
3152    /// `SSE_SHUFPS` (SHUFPS). 
3153    /// Selects a single precision floating-point value of an input quadruplet using a two-bit control and move to a designated element of the destination operand. Each 64-bit element-pair of a 128-bit lane of the destination operand is interleaved between the corresponding lane of the first source operand and the second source operand at the granularity 128 bits. Each two bits in the imm8 byte, starting from bit 0, is the select control of the corresponding element of a 128-bit lane of the destination to received the shuffled result of an input quadruplet. The two lower elements of a 128-bit lane in the destination receives shuffle results from the quadruple of the first source operand. The next two elements of the destination receives shuffle results from the quadruple of the second source operand.
3154    ///
3155    ///
3156    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SHUFPS.html).
3157    ///
3158    /// Supported operand variants:
3159    ///
3160    /// ```text
3161    /// +---+---------------+
3162    /// | # | Operands      |
3163    /// +---+---------------+
3164    /// | 1 | Xmm, Mem, Imm |
3165    /// | 2 | Xmm, Xmm, Imm |
3166    /// +---+---------------+
3167    /// ```
3168    #[inline]
3169    pub fn sse_shufps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
3170    where Assembler<'a>: SseShufpsEmitter<A, B, C> {
3171        <Self as SseShufpsEmitter<A, B, C>>::sse_shufps(self, op0, op1, op2);
3172    }
3173    /// `SSE_SQRTPS` (SQRTPS). 
3174    /// Performs a SIMD computation of the square roots of the four, eight or sixteen packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand.
3175    ///
3176    ///
3177    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTPS.html).
3178    ///
3179    /// Supported operand variants:
3180    ///
3181    /// ```text
3182    /// +---+----------+
3183    /// | # | Operands |
3184    /// +---+----------+
3185    /// | 1 | Xmm, Mem |
3186    /// | 2 | Xmm, Xmm |
3187    /// +---+----------+
3188    /// ```
3189    #[inline]
3190    pub fn sse_sqrtps<A, B>(&mut self, op0: A, op1: B)
3191    where Assembler<'a>: SseSqrtpsEmitter<A, B> {
3192        <Self as SseSqrtpsEmitter<A, B>>::sse_sqrtps(self, op0, op1);
3193    }
3194    /// `SSE_SQRTSS` (SQRTSS). 
3195    /// Computes the square root of the low single precision floating-point value in the second source operand and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source and destination operands is an XMM register.
3196    ///
3197    ///
3198    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTSS.html).
3199    ///
3200    /// Supported operand variants:
3201    ///
3202    /// ```text
3203    /// +---+----------+
3204    /// | # | Operands |
3205    /// +---+----------+
3206    /// | 1 | Xmm, Mem |
3207    /// | 2 | Xmm, Xmm |
3208    /// +---+----------+
3209    /// ```
3210    #[inline]
3211    pub fn sse_sqrtss<A, B>(&mut self, op0: A, op1: B)
3212    where Assembler<'a>: SseSqrtssEmitter<A, B> {
3213        <Self as SseSqrtssEmitter<A, B>>::sse_sqrtss(self, op0, op1);
3214    }
3215    /// `SSE_SUBPS` (SUBPS). 
3216    /// Performs a SIMD subtract of the packed single precision floating-point values in the second Source operand from the First Source operand, and stores the packed single precision floating-point results in the destination operand.
3217    ///
3218    ///
3219    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBPS.html).
3220    ///
3221    /// Supported operand variants:
3222    ///
3223    /// ```text
3224    /// +---+----------+
3225    /// | # | Operands |
3226    /// +---+----------+
3227    /// | 1 | Xmm, Mem |
3228    /// | 2 | Xmm, Xmm |
3229    /// +---+----------+
3230    /// ```
3231    #[inline]
3232    pub fn sse_subps<A, B>(&mut self, op0: A, op1: B)
3233    where Assembler<'a>: SseSubpsEmitter<A, B> {
3234        <Self as SseSubpsEmitter<A, B>>::sse_subps(self, op0, op1);
3235    }
3236    /// `SSE_SUBSS` (SUBSS). 
3237    /// Subtract the low single precision floating-point value from the second source operand and the first source operand and store the double precision floating-point result in the low doubleword of the destination operand.
3238    ///
3239    ///
3240    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBSS.html).
3241    ///
3242    /// Supported operand variants:
3243    ///
3244    /// ```text
3245    /// +---+----------+
3246    /// | # | Operands |
3247    /// +---+----------+
3248    /// | 1 | Xmm, Mem |
3249    /// | 2 | Xmm, Xmm |
3250    /// +---+----------+
3251    /// ```
3252    #[inline]
3253    pub fn sse_subss<A, B>(&mut self, op0: A, op1: B)
3254    where Assembler<'a>: SseSubssEmitter<A, B> {
3255        <Self as SseSubssEmitter<A, B>>::sse_subss(self, op0, op1);
3256    }
3257    /// `SSE_UCOMISS` (UCOMISS). 
3258    /// Compares the single precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
3259    ///
3260    ///
3261    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UCOMISS.html).
3262    ///
3263    /// Supported operand variants:
3264    ///
3265    /// ```text
3266    /// +---+----------+
3267    /// | # | Operands |
3268    /// +---+----------+
3269    /// | 1 | Xmm, Mem |
3270    /// | 2 | Xmm, Xmm |
3271    /// +---+----------+
3272    /// ```
3273    #[inline]
3274    pub fn sse_ucomiss<A, B>(&mut self, op0: A, op1: B)
3275    where Assembler<'a>: SseUcomissEmitter<A, B> {
3276        <Self as SseUcomissEmitter<A, B>>::sse_ucomiss(self, op0, op1);
3277    }
3278    /// `SSE_UNPCKHPS` (UNPCKHPS). 
3279    /// Performs an interleaved unpack of the high single precision floating-point values from the first source operand and the second source operand.
3280    ///
3281    ///
3282    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKHPS.html).
3283    ///
3284    /// Supported operand variants:
3285    ///
3286    /// ```text
3287    /// +---+----------+
3288    /// | # | Operands |
3289    /// +---+----------+
3290    /// | 1 | Xmm, Mem |
3291    /// | 2 | Xmm, Xmm |
3292    /// +---+----------+
3293    /// ```
3294    #[inline]
3295    pub fn sse_unpckhps<A, B>(&mut self, op0: A, op1: B)
3296    where Assembler<'a>: SseUnpckhpsEmitter<A, B> {
3297        <Self as SseUnpckhpsEmitter<A, B>>::sse_unpckhps(self, op0, op1);
3298    }
3299    /// `SSE_UNPCKLPS` (UNPCKLPS). 
3300    /// Performs an interleaved unpack of the low single precision floating-point values from the first source operand and the second source operand.
3301    ///
3302    ///
3303    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKLPS.html).
3304    ///
3305    /// Supported operand variants:
3306    ///
3307    /// ```text
3308    /// +---+----------+
3309    /// | # | Operands |
3310    /// +---+----------+
3311    /// | 1 | Xmm, Mem |
3312    /// | 2 | Xmm, Xmm |
3313    /// +---+----------+
3314    /// ```
3315    #[inline]
3316    pub fn sse_unpcklps<A, B>(&mut self, op0: A, op1: B)
3317    where Assembler<'a>: SseUnpcklpsEmitter<A, B> {
3318        <Self as SseUnpcklpsEmitter<A, B>>::sse_unpcklps(self, op0, op1);
3319    }
3320    /// `SSE_XORPS` (XORPS). 
3321    /// Performs a bitwise logical XOR of the four, eight or sixteen packed single-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
3322    ///
3323    ///
3324    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/XORPS.html).
3325    ///
3326    /// Supported operand variants:
3327    ///
3328    /// ```text
3329    /// +---+----------+
3330    /// | # | Operands |
3331    /// +---+----------+
3332    /// | 1 | Xmm, Mem |
3333    /// | 2 | Xmm, Xmm |
3334    /// +---+----------+
3335    /// ```
3336    #[inline]
3337    pub fn sse_xorps<A, B>(&mut self, op0: A, op1: B)
3338    where Assembler<'a>: SseXorpsEmitter<A, B> {
3339        <Self as SseXorpsEmitter<A, B>>::sse_xorps(self, op0, op1);
3340    }
3341    /// `STMXCSR` (STMXCSR). 
3342    /// Stores the contents of the MXCSR control and status register to the destination operand. The destination operand is a 32-bit memory location. The reserved bits in the MXCSR register are stored as 0s.
3343    ///
3344    ///
3345    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/STMXCSR.html).
3346    ///
3347    /// Supported operand variants:
3348    ///
3349    /// ```text
3350    /// +---+----------+
3351    /// | # | Operands |
3352    /// +---+----------+
3353    /// | 1 | Mem      |
3354    /// +---+----------+
3355    /// ```
3356    #[inline]
3357    pub fn stmxcsr<A>(&mut self, op0: A)
3358    where Assembler<'a>: StmxcsrEmitter<A> {
3359        <Self as StmxcsrEmitter<A>>::stmxcsr(self, op0);
3360    }
3361}