asmkit/x86/features/SSE.rs
1use crate::x86::assembler::*;
2use crate::x86::operands::*;
3use super::super::opcodes::*;
4use crate::core::emitter::*;
5use crate::core::operand::*;
6
7/// A dummy operand that represents no register. Here just for simplicity.
8const NOREG: Operand = Operand::new();
9
10/// `LDMXCSR` (LDMXCSR).
11/// Loads the source operand into the MXCSR control/status register. The source operand is a 32-bit memory location. See “MXCSR Control and Status Register” in Chapter 10, of the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for a description of the MXCSR register and its contents.
12///
13///
14/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDMXCSR.html).
15///
16/// Supported operand variants:
17///
18/// ```text
19/// +---+----------+
20/// | # | Operands |
21/// +---+----------+
22/// | 1 | Mem |
23/// +---+----------+
24/// ```
25pub trait LdmxcsrEmitter<A> {
26 fn ldmxcsr(&mut self, op0: A);
27}
28
29impl<'a> LdmxcsrEmitter<Mem> for Assembler<'a> {
30 fn ldmxcsr(&mut self, op0: Mem) {
31 self.emit(LDMXCSRM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
32 }
33}
34
35/// `MMX_MASKMOVQ` (MASKMOVQ).
36/// Stores selected bytes from the source operand (first operand) into a 64-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are MMX technology registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
37///
38///
39/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVQ.html).
40///
41/// Supported operand variants:
42///
43/// ```text
44/// +---+----------+
45/// | # | Operands |
46/// +---+----------+
47/// | 1 | Mm, Mm |
48/// +---+----------+
49/// ```
50pub trait MmxMaskmovqEmitter<A, B> {
51 fn mmx_maskmovq(&mut self, op0: A, op1: B);
52}
53
54impl<'a> MmxMaskmovqEmitter<Mm, Mm> for Assembler<'a> {
55 fn mmx_maskmovq(&mut self, op0: Mm, op1: Mm) {
56 self.emit(MMX_MASKMOVQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
57 }
58}
59
60/// `MMX_MOVDQ2Q`.
61///
62/// Supported operand variants:
63///
64/// ```text
65/// +---+----------+
66/// | # | Operands |
67/// +---+----------+
68/// | 1 | Mm, Xmm |
69/// +---+----------+
70/// ```
71pub trait MmxMovdq2qEmitter<A, B> {
72 fn mmx_movdq2q(&mut self, op0: A, op1: B);
73}
74
75impl<'a> MmxMovdq2qEmitter<Mm, Xmm> for Assembler<'a> {
76 fn mmx_movdq2q(&mut self, op0: Mm, op1: Xmm) {
77 self.emit(MMX_MOVDQ2QRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
78 }
79}
80
81/// `MMX_MOVNTQ` (MOVNTQ).
82/// Moves the quadword in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to minimize cache pollution during the write to memory. The source operand is an MMX technology register, which is assumed to contain packed integer data (packed bytes, words, or doublewords). The destination operand is a 64-bit memory location.
83///
84///
85/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTQ.html).
86///
87/// Supported operand variants:
88///
89/// ```text
90/// +---+----------+
91/// | # | Operands |
92/// +---+----------+
93/// | 1 | Mem, Mm |
94/// +---+----------+
95/// ```
96pub trait MmxMovntqEmitter<A, B> {
97 fn mmx_movntq(&mut self, op0: A, op1: B);
98}
99
100impl<'a> MmxMovntqEmitter<Mem, Mm> for Assembler<'a> {
101 fn mmx_movntq(&mut self, op0: Mem, op1: Mm) {
102 self.emit(MMX_MOVNTQMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
103 }
104}
105
106/// `MMX_MOVQ2DQ` (MOVQ2DQ).
107/// Moves the quadword from the source operand (second operand) to the low quadword of the destination operand (first operand). The source operand is an MMX technology register and the destination operand is an XMM register.
108///
109///
110/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVQ2DQ.html).
111///
112/// Supported operand variants:
113///
114/// ```text
115/// +---+----------+
116/// | # | Operands |
117/// +---+----------+
118/// | 1 | Xmm, Mm |
119/// +---+----------+
120/// ```
121pub trait MmxMovq2dqEmitter<A, B> {
122 fn mmx_movq2dq(&mut self, op0: A, op1: B);
123}
124
125impl<'a> MmxMovq2dqEmitter<Xmm, Mm> for Assembler<'a> {
126 fn mmx_movq2dq(&mut self, op0: Xmm, op1: Mm) {
127 self.emit(MMX_MOVQ2DQRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
128 }
129}
130
131/// `MMX_PAVGB` (PAVGB).
132/// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
133///
134///
135/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
136///
137/// Supported operand variants:
138///
139/// ```text
140/// +---+----------+
141/// | # | Operands |
142/// +---+----------+
143/// | 1 | Mm, Mem |
144/// | 2 | Mm, Mm |
145/// +---+----------+
146/// ```
147pub trait MmxPavgbEmitter<A, B> {
148 fn mmx_pavgb(&mut self, op0: A, op1: B);
149}
150
151impl<'a> MmxPavgbEmitter<Mm, Mm> for Assembler<'a> {
152 fn mmx_pavgb(&mut self, op0: Mm, op1: Mm) {
153 self.emit(MMX_PAVGBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
154 }
155}
156
157impl<'a> MmxPavgbEmitter<Mm, Mem> for Assembler<'a> {
158 fn mmx_pavgb(&mut self, op0: Mm, op1: Mem) {
159 self.emit(MMX_PAVGBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
160 }
161}
162
163/// `MMX_PAVGW` (PAVGW).
164/// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
165///
166///
167/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
168///
169/// Supported operand variants:
170///
171/// ```text
172/// +---+----------+
173/// | # | Operands |
174/// +---+----------+
175/// | 1 | Mm, Mem |
176/// | 2 | Mm, Mm |
177/// +---+----------+
178/// ```
179pub trait MmxPavgwEmitter<A, B> {
180 fn mmx_pavgw(&mut self, op0: A, op1: B);
181}
182
183impl<'a> MmxPavgwEmitter<Mm, Mm> for Assembler<'a> {
184 fn mmx_pavgw(&mut self, op0: Mm, op1: Mm) {
185 self.emit(MMX_PAVGWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
186 }
187}
188
189impl<'a> MmxPavgwEmitter<Mm, Mem> for Assembler<'a> {
190 fn mmx_pavgw(&mut self, op0: Mm, op1: Mem) {
191 self.emit(MMX_PAVGWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
192 }
193}
194
195/// `MMX_PEXTRW` (PEXTRW).
196/// Copies the word in the source operand (second operand) specified by the count operand (third operand) to the destination operand (first operand). The source operand can be an MMX technology register or an XMM register. The destination operand can be the low word of a general-purpose register or a 16-bit memory address. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location. The content of the destination register above bit 16 is cleared (set to all 0s).
197///
198///
199/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRW.html).
200///
201/// Supported operand variants:
202///
203/// ```text
204/// +---+--------------+
205/// | # | Operands |
206/// +---+--------------+
207/// | 1 | Gpq, Mm, Imm |
208/// +---+--------------+
209/// ```
210pub trait MmxPextrwEmitter<A, B, C> {
211 fn mmx_pextrw(&mut self, op0: A, op1: B, op2: C);
212}
213
214impl<'a> MmxPextrwEmitter<Gpq, Mm, Imm> for Assembler<'a> {
215 fn mmx_pextrw(&mut self, op0: Gpq, op1: Mm, op2: Imm) {
216 self.emit(MMX_PEXTRWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
217 }
218}
219
220/// `MMX_PINSRW` (PINSRW).
221/// Three operand MMX and SSE instructions
222///
223///
224/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRW.html).
225///
226/// Supported operand variants:
227///
228/// ```text
229/// +---+--------------+
230/// | # | Operands |
231/// +---+--------------+
232/// | 1 | Mm, Gpd, Imm |
233/// | 2 | Mm, Mem, Imm |
234/// +---+--------------+
235/// ```
236pub trait MmxPinsrwEmitter<A, B, C> {
237 fn mmx_pinsrw(&mut self, op0: A, op1: B, op2: C);
238}
239
240impl<'a> MmxPinsrwEmitter<Mm, Gpd, Imm> for Assembler<'a> {
241 fn mmx_pinsrw(&mut self, op0: Mm, op1: Gpd, op2: Imm) {
242 self.emit(MMX_PINSRWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
243 }
244}
245
246impl<'a> MmxPinsrwEmitter<Mm, Mem, Imm> for Assembler<'a> {
247 fn mmx_pinsrw(&mut self, op0: Mm, op1: Mem, op2: Imm) {
248 self.emit(MMX_PINSRWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
249 }
250}
251
252/// `MMX_PMAXSW` (PMAXSW).
253/// Performs a SIMD compare of the packed signed byte, word, dword or qword integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
254///
255///
256/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXSB%3APMAXSW%3APMAXSD%3APMAXSQ.html).
257///
258/// Supported operand variants:
259///
260/// ```text
261/// +---+----------+
262/// | # | Operands |
263/// +---+----------+
264/// | 1 | Mm, Mem |
265/// | 2 | Mm, Mm |
266/// +---+----------+
267/// ```
268pub trait MmxPmaxswEmitter<A, B> {
269 fn mmx_pmaxsw(&mut self, op0: A, op1: B);
270}
271
272impl<'a> MmxPmaxswEmitter<Mm, Mm> for Assembler<'a> {
273 fn mmx_pmaxsw(&mut self, op0: Mm, op1: Mm) {
274 self.emit(MMX_PMAXSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
275 }
276}
277
278impl<'a> MmxPmaxswEmitter<Mm, Mem> for Assembler<'a> {
279 fn mmx_pmaxsw(&mut self, op0: Mm, op1: Mem) {
280 self.emit(MMX_PMAXSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
281 }
282}
283
284/// `MMX_PMAXUB` (PMAXUB).
285/// Performs a SIMD compare of the packed unsigned byte, word integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
286///
287///
288/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXUB%3APMAXUW.html).
289///
290/// Supported operand variants:
291///
292/// ```text
293/// +---+----------+
294/// | # | Operands |
295/// +---+----------+
296/// | 1 | Mm, Mem |
297/// | 2 | Mm, Mm |
298/// +---+----------+
299/// ```
300pub trait MmxPmaxubEmitter<A, B> {
301 fn mmx_pmaxub(&mut self, op0: A, op1: B);
302}
303
304impl<'a> MmxPmaxubEmitter<Mm, Mm> for Assembler<'a> {
305 fn mmx_pmaxub(&mut self, op0: Mm, op1: Mm) {
306 self.emit(MMX_PMAXUBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
307 }
308}
309
310impl<'a> MmxPmaxubEmitter<Mm, Mem> for Assembler<'a> {
311 fn mmx_pmaxub(&mut self, op0: Mm, op1: Mem) {
312 self.emit(MMX_PMAXUBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
313 }
314}
315
316/// `MMX_PMINSW` (PMINSW).
317/// Performs a SIMD compare of the packed signed byte, word, or dword integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
318///
319///
320/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINSB%3APMINSW.html).
321///
322/// Supported operand variants:
323///
324/// ```text
325/// +---+----------+
326/// | # | Operands |
327/// +---+----------+
328/// | 1 | Mm, Mem |
329/// | 2 | Mm, Mm |
330/// +---+----------+
331/// ```
332pub trait MmxPminswEmitter<A, B> {
333 fn mmx_pminsw(&mut self, op0: A, op1: B);
334}
335
336impl<'a> MmxPminswEmitter<Mm, Mm> for Assembler<'a> {
337 fn mmx_pminsw(&mut self, op0: Mm, op1: Mm) {
338 self.emit(MMX_PMINSWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
339 }
340}
341
342impl<'a> MmxPminswEmitter<Mm, Mem> for Assembler<'a> {
343 fn mmx_pminsw(&mut self, op0: Mm, op1: Mem) {
344 self.emit(MMX_PMINSWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
345 }
346}
347
348/// `MMX_PMINUB` (PMINUB).
349/// Performs a SIMD compare of the packed unsigned byte or word integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
350///
351///
352/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINUB%3APMINUW.html).
353///
354/// Supported operand variants:
355///
356/// ```text
357/// +---+----------+
358/// | # | Operands |
359/// +---+----------+
360/// | 1 | Mm, Mem |
361/// | 2 | Mm, Mm |
362/// +---+----------+
363/// ```
364pub trait MmxPminubEmitter<A, B> {
365 fn mmx_pminub(&mut self, op0: A, op1: B);
366}
367
368impl<'a> MmxPminubEmitter<Mm, Mm> for Assembler<'a> {
369 fn mmx_pminub(&mut self, op0: Mm, op1: Mm) {
370 self.emit(MMX_PMINUBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
371 }
372}
373
374impl<'a> MmxPminubEmitter<Mm, Mem> for Assembler<'a> {
375 fn mmx_pminub(&mut self, op0: Mm, op1: Mem) {
376 self.emit(MMX_PMINUBRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
377 }
378}
379
380/// `MMX_PMOVMSKB` (PMOVMSKB).
381/// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
382///
383///
384/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
385///
386/// Supported operand variants:
387///
388/// ```text
389/// +---+----------+
390/// | # | Operands |
391/// +---+----------+
392/// | 1 | Gpq, Mm |
393/// +---+----------+
394/// ```
395pub trait MmxPmovmskbEmitter<A, B> {
396 fn mmx_pmovmskb(&mut self, op0: A, op1: B);
397}
398
399impl<'a> MmxPmovmskbEmitter<Gpq, Mm> for Assembler<'a> {
400 fn mmx_pmovmskb(&mut self, op0: Gpq, op1: Mm) {
401 self.emit(MMX_PMOVMSKBRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
402 }
403}
404
405/// `MMX_PMULHUW` (PMULHUW).
406/// Performs a SIMD unsigned multiply of the packed unsigned word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each 32-bit intermediate results in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
407///
408///
409/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHUW.html).
410///
411/// Supported operand variants:
412///
413/// ```text
414/// +---+----------+
415/// | # | Operands |
416/// +---+----------+
417/// | 1 | Mm, Mem |
418/// | 2 | Mm, Mm |
419/// +---+----------+
420/// ```
421pub trait MmxPmulhuwEmitter<A, B> {
422 fn mmx_pmulhuw(&mut self, op0: A, op1: B);
423}
424
425impl<'a> MmxPmulhuwEmitter<Mm, Mm> for Assembler<'a> {
426 fn mmx_pmulhuw(&mut self, op0: Mm, op1: Mm) {
427 self.emit(MMX_PMULHUWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
428 }
429}
430
431impl<'a> MmxPmulhuwEmitter<Mm, Mem> for Assembler<'a> {
432 fn mmx_pmulhuw(&mut self, op0: Mm, op1: Mem) {
433 self.emit(MMX_PMULHUWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
434 }
435}
436
437/// `MMX_PSADBW` (PSADBW).
438/// Computes the absolute value of the difference of 8 unsigned byte integers from the source operand (second operand) and from the destination operand (first operand). These 8 differences are then summed to produce an unsigned word integer result that is stored in the destination operand. Figure 4-14 shows the operation of the PSADBW instruction when using 64-bit operands.
439///
440///
441/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSADBW.html).
442///
443/// Supported operand variants:
444///
445/// ```text
446/// +---+----------+
447/// | # | Operands |
448/// +---+----------+
449/// | 1 | Mm, Mem |
450/// | 2 | Mm, Mm |
451/// +---+----------+
452/// ```
453pub trait MmxPsadbwEmitter<A, B> {
454 fn mmx_psadbw(&mut self, op0: A, op1: B);
455}
456
457impl<'a> MmxPsadbwEmitter<Mm, Mm> for Assembler<'a> {
458 fn mmx_psadbw(&mut self, op0: Mm, op1: Mm) {
459 self.emit(MMX_PSADBWRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
460 }
461}
462
463impl<'a> MmxPsadbwEmitter<Mm, Mem> for Assembler<'a> {
464 fn mmx_psadbw(&mut self, op0: Mm, op1: Mem) {
465 self.emit(MMX_PSADBWRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
466 }
467}
468
469/// `MMX_PSHUFW` (PSHUFW).
470/// Copies words from the source operand (second operand) and inserts them in the destination operand (first operand) at word locations selected with the order operand (third operand). This operation is similar to the operation used by the PSHUFD instruction, which is illustrated in Figure 4-16. For the PSHUFW instruction, each 2-bit field in the order operand selects the contents of one word location in the destination operand. The encodings of the order operand fields select words from the source operand to be copied to the destination operand.
471///
472///
473/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFW.html).
474///
475/// Supported operand variants:
476///
477/// ```text
478/// +---+--------------+
479/// | # | Operands |
480/// +---+--------------+
481/// | 1 | Mm, Mem, Imm |
482/// | 2 | Mm, Mm, Imm |
483/// +---+--------------+
484/// ```
485pub trait MmxPshufwEmitter<A, B, C> {
486 fn mmx_pshufw(&mut self, op0: A, op1: B, op2: C);
487}
488
489impl<'a> MmxPshufwEmitter<Mm, Mm, Imm> for Assembler<'a> {
490 fn mmx_pshufw(&mut self, op0: Mm, op1: Mm, op2: Imm) {
491 self.emit(MMX_PSHUFWRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
492 }
493}
494
495impl<'a> MmxPshufwEmitter<Mm, Mem, Imm> for Assembler<'a> {
496 fn mmx_pshufw(&mut self, op0: Mm, op1: Mem, op2: Imm) {
497 self.emit(MMX_PSHUFWRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
498 }
499}
500
501/// `PREFETCHNTA` (PREFETCHNTA).
502/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
503///
504///
505/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
506///
507/// Supported operand variants:
508///
509/// ```text
510/// +---+----------+
511/// | # | Operands |
512/// +---+----------+
513/// | 1 | Mem |
514/// +---+----------+
515/// ```
516pub trait PrefetchntaEmitter<A> {
517 fn prefetchnta(&mut self, op0: A);
518}
519
520impl<'a> PrefetchntaEmitter<Mem> for Assembler<'a> {
521 fn prefetchnta(&mut self, op0: Mem) {
522 self.emit(PREFETCHNTAM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
523 }
524}
525
526/// `PREFETCHT0` (PREFETCHT0).
527/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
528///
529///
530/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
531///
532/// Supported operand variants:
533///
534/// ```text
535/// +---+----------+
536/// | # | Operands |
537/// +---+----------+
538/// | 1 | Mem |
539/// +---+----------+
540/// ```
541pub trait Prefetcht0Emitter<A> {
542 fn prefetcht0(&mut self, op0: A);
543}
544
545impl<'a> Prefetcht0Emitter<Mem> for Assembler<'a> {
546 fn prefetcht0(&mut self, op0: Mem) {
547 self.emit(PREFETCHT0M, op0.as_operand(), &NOREG, &NOREG, &NOREG);
548 }
549}
550
551/// `PREFETCHT1` (PREFETCHT1).
552/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
553///
554///
555/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
556///
557/// Supported operand variants:
558///
559/// ```text
560/// +---+----------+
561/// | # | Operands |
562/// +---+----------+
563/// | 1 | Mem |
564/// +---+----------+
565/// ```
566pub trait Prefetcht1Emitter<A> {
567 fn prefetcht1(&mut self, op0: A);
568}
569
570impl<'a> Prefetcht1Emitter<Mem> for Assembler<'a> {
571 fn prefetcht1(&mut self, op0: Mem) {
572 self.emit(PREFETCHT1M, op0.as_operand(), &NOREG, &NOREG, &NOREG);
573 }
574}
575
576/// `PREFETCHT2` (PREFETCHT2).
577/// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
578///
579///
580/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
581///
582/// Supported operand variants:
583///
584/// ```text
585/// +---+----------+
586/// | # | Operands |
587/// +---+----------+
588/// | 1 | Mem |
589/// +---+----------+
590/// ```
591pub trait Prefetcht2Emitter<A> {
592 fn prefetcht2(&mut self, op0: A);
593}
594
595impl<'a> Prefetcht2Emitter<Mem> for Assembler<'a> {
596 fn prefetcht2(&mut self, op0: Mem) {
597 self.emit(PREFETCHT2M, op0.as_operand(), &NOREG, &NOREG, &NOREG);
598 }
599}
600
601/// `SFENCE` (SFENCE).
602/// Orders processor execution relative to all memory stores prior to the SFENCE instruction. The processor ensures that every store prior to SFENCE is globally visible before any store after SFENCE becomes globally visible. The SFENCE instruction is ordered with respect to memory stores, other SFENCE instructions, MFENCE instructions, and any serializing instructions (such as the CPUID instruction). It is not ordered with respect to memory loads or the LFENCE instruction.
603///
604///
605/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SFENCE.html).
606///
607/// Supported operand variants:
608///
609/// ```text
610/// +---+----------+
611/// | # | Operands |
612/// +---+----------+
613/// | 1 | (none) |
614/// +---+----------+
615/// ```
616pub trait SfenceEmitter {
617 fn sfence(&mut self);
618}
619
620impl<'a> SfenceEmitter for Assembler<'a> {
621 fn sfence(&mut self) {
622 self.emit(SFENCE, &NOREG, &NOREG, &NOREG, &NOREG);
623 }
624}
625
626/// `SSE_ADDPS` (ADDPS).
627/// Adds four, eight or sixteen packed single precision floating-point values from the first source operand with the second source operand, and stores the packed single precision floating-point result in the destination operand.
628///
629///
630/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDPS.html).
631///
632/// Supported operand variants:
633///
634/// ```text
635/// +---+----------+
636/// | # | Operands |
637/// +---+----------+
638/// | 1 | Xmm, Mem |
639/// | 2 | Xmm, Xmm |
640/// +---+----------+
641/// ```
642pub trait SseAddpsEmitter<A, B> {
643 fn sse_addps(&mut self, op0: A, op1: B);
644}
645
646impl<'a> SseAddpsEmitter<Xmm, Xmm> for Assembler<'a> {
647 fn sse_addps(&mut self, op0: Xmm, op1: Xmm) {
648 self.emit(SSE_ADDPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
649 }
650}
651
652impl<'a> SseAddpsEmitter<Xmm, Mem> for Assembler<'a> {
653 fn sse_addps(&mut self, op0: Xmm, op1: Mem) {
654 self.emit(SSE_ADDPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
655 }
656}
657
658/// `SSE_ADDSS` (ADDSS).
659/// Adds the low single precision floating-point values from the second source operand and the first source operand, and stores the double precision floating-point result in the destination operand.
660///
661///
662/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSS.html).
663///
664/// Supported operand variants:
665///
666/// ```text
667/// +---+----------+
668/// | # | Operands |
669/// +---+----------+
670/// | 1 | Xmm, Mem |
671/// | 2 | Xmm, Xmm |
672/// +---+----------+
673/// ```
674pub trait SseAddssEmitter<A, B> {
675 fn sse_addss(&mut self, op0: A, op1: B);
676}
677
678impl<'a> SseAddssEmitter<Xmm, Xmm> for Assembler<'a> {
679 fn sse_addss(&mut self, op0: Xmm, op1: Xmm) {
680 self.emit(SSE_ADDSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
681 }
682}
683
684impl<'a> SseAddssEmitter<Xmm, Mem> for Assembler<'a> {
685 fn sse_addss(&mut self, op0: Xmm, op1: Mem) {
686 self.emit(SSE_ADDSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
687 }
688}
689
690/// `SSE_ANDNPS` (ANDNPS).
691/// Performs a bitwise logical AND NOT of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
692///
693///
694/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDNPS.html).
695///
696/// Supported operand variants:
697///
698/// ```text
699/// +---+----------+
700/// | # | Operands |
701/// +---+----------+
702/// | 1 | Xmm, Mem |
703/// | 2 | Xmm, Xmm |
704/// +---+----------+
705/// ```
706pub trait SseAndnpsEmitter<A, B> {
707 fn sse_andnps(&mut self, op0: A, op1: B);
708}
709
710impl<'a> SseAndnpsEmitter<Xmm, Xmm> for Assembler<'a> {
711 fn sse_andnps(&mut self, op0: Xmm, op1: Xmm) {
712 self.emit(SSE_ANDNPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
713 }
714}
715
716impl<'a> SseAndnpsEmitter<Xmm, Mem> for Assembler<'a> {
717 fn sse_andnps(&mut self, op0: Xmm, op1: Mem) {
718 self.emit(SSE_ANDNPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
719 }
720}
721
722/// `SSE_ANDPS` (ANDPS).
723/// Performs a bitwise logical AND of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
724///
725///
726/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDPS.html).
727///
728/// Supported operand variants:
729///
730/// ```text
731/// +---+----------+
732/// | # | Operands |
733/// +---+----------+
734/// | 1 | Xmm, Mem |
735/// | 2 | Xmm, Xmm |
736/// +---+----------+
737/// ```
738pub trait SseAndpsEmitter<A, B> {
739 fn sse_andps(&mut self, op0: A, op1: B);
740}
741
742impl<'a> SseAndpsEmitter<Xmm, Xmm> for Assembler<'a> {
743 fn sse_andps(&mut self, op0: Xmm, op1: Xmm) {
744 self.emit(SSE_ANDPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
745 }
746}
747
748impl<'a> SseAndpsEmitter<Xmm, Mem> for Assembler<'a> {
749 fn sse_andps(&mut self, op0: Xmm, op1: Mem) {
750 self.emit(SSE_ANDPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
751 }
752}
753
754/// `SSE_CMPPS` (CMPPS).
755/// Performs a SIMD compare of the packed single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
756///
757///
758/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPS.html).
759///
760/// Supported operand variants:
761///
762/// ```text
763/// +---+---------------+
764/// | # | Operands |
765/// +---+---------------+
766/// | 1 | Xmm, Mem, Imm |
767/// | 2 | Xmm, Xmm, Imm |
768/// +---+---------------+
769/// ```
770pub trait SseCmppsEmitter<A, B, C> {
771 fn sse_cmpps(&mut self, op0: A, op1: B, op2: C);
772}
773
774impl<'a> SseCmppsEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
775 fn sse_cmpps(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
776 self.emit(SSE_CMPPSRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
777 }
778}
779
780impl<'a> SseCmppsEmitter<Xmm, Mem, Imm> for Assembler<'a> {
781 fn sse_cmpps(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
782 self.emit(SSE_CMPPSRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
783 }
784}
785
786/// `SSE_CMPSS` (CMPSS).
787/// Compares the low single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
788///
789///
790/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPSS.html).
791///
792/// Supported operand variants:
793///
794/// ```text
795/// +---+---------------+
796/// | # | Operands |
797/// +---+---------------+
798/// | 1 | Xmm, Mem, Imm |
799/// | 2 | Xmm, Xmm, Imm |
800/// +---+---------------+
801/// ```
802pub trait SseCmpssEmitter<A, B, C> {
803 fn sse_cmpss(&mut self, op0: A, op1: B, op2: C);
804}
805
806impl<'a> SseCmpssEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
807 fn sse_cmpss(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
808 self.emit(SSE_CMPSSRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
809 }
810}
811
812impl<'a> SseCmpssEmitter<Xmm, Mem, Imm> for Assembler<'a> {
813 fn sse_cmpss(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
814 self.emit(SSE_CMPSSRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
815 }
816}
817
818/// `SSE_COMISS` (COMISS).
819/// Compares the single precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
820///
821///
822/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/COMISS.html).
823///
824/// Supported operand variants:
825///
826/// ```text
827/// +---+----------+
828/// | # | Operands |
829/// +---+----------+
830/// | 1 | Xmm, Mem |
831/// | 2 | Xmm, Xmm |
832/// +---+----------+
833/// ```
834pub trait SseComissEmitter<A, B> {
835 fn sse_comiss(&mut self, op0: A, op1: B);
836}
837
838impl<'a> SseComissEmitter<Xmm, Xmm> for Assembler<'a> {
839 fn sse_comiss(&mut self, op0: Xmm, op1: Xmm) {
840 self.emit(SSE_COMISSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
841 }
842}
843
844impl<'a> SseComissEmitter<Xmm, Mem> for Assembler<'a> {
845 fn sse_comiss(&mut self, op0: Xmm, op1: Mem) {
846 self.emit(SSE_COMISSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
847 }
848}
849
850/// `SSE_CVTSI2SS` (CVTSI2SS).
851/// Converts a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the “convert-from” source operand to a single precision floating-point value in the destination operand (first operand). The “convert-from” source operand can be a general-purpose register or a memory location. The destination operand is an XMM register. The result is stored in the low doubleword of the destination operand, and the upper three doublewords are left unchanged. When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits.
852///
853///
854/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSI2SS.html).
855///
856/// Supported operand variants:
857///
858/// ```text
859/// +---+----------+
860/// | # | Operands |
861/// +---+----------+
862/// | 1 | Xmm, Gpd |
863/// | 2 | Xmm, Gpq |
864/// | 3 | Xmm, Mem |
865/// +---+----------+
866/// ```
867pub trait SseCvtsi2ssEmitter<A, B> {
868 fn sse_cvtsi2ss(&mut self, op0: A, op1: B);
869}
870
871impl<'a> SseCvtsi2ssEmitter<Xmm, Gpd> for Assembler<'a> {
872 fn sse_cvtsi2ss(&mut self, op0: Xmm, op1: Gpd) {
873 self.emit(SSE_CVTSI2SS32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
874 }
875}
876
877impl<'a> SseCvtsi2ssEmitter<Xmm, Mem> for Assembler<'a> {
878 fn sse_cvtsi2ss(&mut self, op0: Xmm, op1: Mem) {
879 self.emit(SSE_CVTSI2SS32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
880 }
881}
882
883impl<'a> SseCvtsi2ssEmitter<Xmm, Gpq> for Assembler<'a> {
884 fn sse_cvtsi2ss(&mut self, op0: Xmm, op1: Gpq) {
885 self.emit(SSE_CVTSI2SS64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
886 }
887}
888
889/// `SSE_CVTSS2SI` (CVTSS2SI).
890/// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
891///
892///
893/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSS2SI.html).
894///
895/// Supported operand variants:
896///
897/// ```text
898/// +---+----------+
899/// | # | Operands |
900/// +---+----------+
901/// | 1 | Gpd, Mem |
902/// | 2 | Gpd, Xmm |
903/// | 3 | Gpq, Mem |
904/// | 4 | Gpq, Xmm |
905/// +---+----------+
906/// ```
907pub trait SseCvtss2siEmitter<A, B> {
908 fn sse_cvtss2si(&mut self, op0: A, op1: B);
909}
910
911impl<'a> SseCvtss2siEmitter<Gpd, Xmm> for Assembler<'a> {
912 fn sse_cvtss2si(&mut self, op0: Gpd, op1: Xmm) {
913 self.emit(SSE_CVTSS2SI32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
914 }
915}
916
917impl<'a> SseCvtss2siEmitter<Gpd, Mem> for Assembler<'a> {
918 fn sse_cvtss2si(&mut self, op0: Gpd, op1: Mem) {
919 self.emit(SSE_CVTSS2SI32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
920 }
921}
922
923impl<'a> SseCvtss2siEmitter<Gpq, Xmm> for Assembler<'a> {
924 fn sse_cvtss2si(&mut self, op0: Gpq, op1: Xmm) {
925 self.emit(SSE_CVTSS2SI64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
926 }
927}
928
929impl<'a> SseCvtss2siEmitter<Gpq, Mem> for Assembler<'a> {
930 fn sse_cvtss2si(&mut self, op0: Gpq, op1: Mem) {
931 self.emit(SSE_CVTSS2SI64RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
932 }
933}
934
935/// `SSE_CVTTSS2SI` (CVTTSS2SI).
936/// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
937///
938///
939/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTSS2SI.html).
940///
941/// Supported operand variants:
942///
943/// ```text
944/// +---+----------+
945/// | # | Operands |
946/// +---+----------+
947/// | 1 | Gpd, Mem |
948/// | 2 | Gpd, Xmm |
949/// | 3 | Gpq, Mem |
950/// | 4 | Gpq, Xmm |
951/// +---+----------+
952/// ```
953pub trait SseCvttss2siEmitter<A, B> {
954 fn sse_cvttss2si(&mut self, op0: A, op1: B);
955}
956
957impl<'a> SseCvttss2siEmitter<Gpd, Xmm> for Assembler<'a> {
958 fn sse_cvttss2si(&mut self, op0: Gpd, op1: Xmm) {
959 self.emit(SSE_CVTTSS2SI32RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
960 }
961}
962
963impl<'a> SseCvttss2siEmitter<Gpd, Mem> for Assembler<'a> {
964 fn sse_cvttss2si(&mut self, op0: Gpd, op1: Mem) {
965 self.emit(SSE_CVTTSS2SI32RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
966 }
967}
968
969impl<'a> SseCvttss2siEmitter<Gpq, Xmm> for Assembler<'a> {
970 fn sse_cvttss2si(&mut self, op0: Gpq, op1: Xmm) {
971 self.emit(SSE_CVTTSS2SI64RR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
972 }
973}
974
975impl<'a> SseCvttss2siEmitter<Gpq, Mem> for Assembler<'a> {
976 fn sse_cvttss2si(&mut self, op0: Gpq, op1: Mem) {
977 self.emit(SSE_CVTTSS2SI64RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
978 }
979}
980
981/// `SSE_DIVPS` (DIVPS).
982/// Performs a SIMD divide of the four, eight or sixteen packed single precision floating-point values in the first source operand (the second operand) by the four, eight or sixteen packed single precision floating-point values in the second source operand (the third operand). Results are written to the destination operand (the first operand).
983///
984///
985/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVPS.html).
986///
987/// Supported operand variants:
988///
989/// ```text
990/// +---+----------+
991/// | # | Operands |
992/// +---+----------+
993/// | 1 | Xmm, Mem |
994/// | 2 | Xmm, Xmm |
995/// +---+----------+
996/// ```
997pub trait SseDivpsEmitter<A, B> {
998 fn sse_divps(&mut self, op0: A, op1: B);
999}
1000
1001impl<'a> SseDivpsEmitter<Xmm, Xmm> for Assembler<'a> {
1002 fn sse_divps(&mut self, op0: Xmm, op1: Xmm) {
1003 self.emit(SSE_DIVPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1004 }
1005}
1006
1007impl<'a> SseDivpsEmitter<Xmm, Mem> for Assembler<'a> {
1008 fn sse_divps(&mut self, op0: Xmm, op1: Mem) {
1009 self.emit(SSE_DIVPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1010 }
1011}
1012
1013/// `SSE_DIVSS` (DIVSS).
1014/// Divides the low single precision floating-point value in the first source operand by the low single precision floating-point value in the second source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location.
1015///
1016///
1017/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVSS.html).
1018///
1019/// Supported operand variants:
1020///
1021/// ```text
1022/// +---+----------+
1023/// | # | Operands |
1024/// +---+----------+
1025/// | 1 | Xmm, Mem |
1026/// | 2 | Xmm, Xmm |
1027/// +---+----------+
1028/// ```
1029pub trait SseDivssEmitter<A, B> {
1030 fn sse_divss(&mut self, op0: A, op1: B);
1031}
1032
1033impl<'a> SseDivssEmitter<Xmm, Xmm> for Assembler<'a> {
1034 fn sse_divss(&mut self, op0: Xmm, op1: Xmm) {
1035 self.emit(SSE_DIVSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1036 }
1037}
1038
1039impl<'a> SseDivssEmitter<Xmm, Mem> for Assembler<'a> {
1040 fn sse_divss(&mut self, op0: Xmm, op1: Mem) {
1041 self.emit(SSE_DIVSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1042 }
1043}
1044
1045/// `SSE_MAXPS` (MAXPS).
1046/// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand.
1047///
1048///
1049/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXPS.html).
1050///
1051/// Supported operand variants:
1052///
1053/// ```text
1054/// +---+----------+
1055/// | # | Operands |
1056/// +---+----------+
1057/// | 1 | Xmm, Mem |
1058/// | 2 | Xmm, Xmm |
1059/// +---+----------+
1060/// ```
1061pub trait SseMaxpsEmitter<A, B> {
1062 fn sse_maxps(&mut self, op0: A, op1: B);
1063}
1064
1065impl<'a> SseMaxpsEmitter<Xmm, Xmm> for Assembler<'a> {
1066 fn sse_maxps(&mut self, op0: Xmm, op1: Xmm) {
1067 self.emit(SSE_MAXPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1068 }
1069}
1070
1071impl<'a> SseMaxpsEmitter<Xmm, Mem> for Assembler<'a> {
1072 fn sse_maxps(&mut self, op0: Xmm, op1: Mem) {
1073 self.emit(SSE_MAXPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1074 }
1075}
1076
1077/// `SSE_MAXSS` (MAXSS).
1078/// Compares the low single precision floating-point values in the first source operand and the second source operand, and returns the maximum value to the low doubleword of the destination operand.
1079///
1080///
1081/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXSS.html).
1082///
1083/// Supported operand variants:
1084///
1085/// ```text
1086/// +---+----------+
1087/// | # | Operands |
1088/// +---+----------+
1089/// | 1 | Xmm, Mem |
1090/// | 2 | Xmm, Xmm |
1091/// +---+----------+
1092/// ```
1093pub trait SseMaxssEmitter<A, B> {
1094 fn sse_maxss(&mut self, op0: A, op1: B);
1095}
1096
1097impl<'a> SseMaxssEmitter<Xmm, Xmm> for Assembler<'a> {
1098 fn sse_maxss(&mut self, op0: Xmm, op1: Xmm) {
1099 self.emit(SSE_MAXSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1100 }
1101}
1102
1103impl<'a> SseMaxssEmitter<Xmm, Mem> for Assembler<'a> {
1104 fn sse_maxss(&mut self, op0: Xmm, op1: Mem) {
1105 self.emit(SSE_MAXSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1106 }
1107}
1108
1109/// `SSE_MINPS` (MINPS).
1110/// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the minimum value for each pair of values to the destination operand.
1111///
1112///
1113/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINPS.html).
1114///
1115/// Supported operand variants:
1116///
1117/// ```text
1118/// +---+----------+
1119/// | # | Operands |
1120/// +---+----------+
1121/// | 1 | Xmm, Mem |
1122/// | 2 | Xmm, Xmm |
1123/// +---+----------+
1124/// ```
1125pub trait SseMinpsEmitter<A, B> {
1126 fn sse_minps(&mut self, op0: A, op1: B);
1127}
1128
1129impl<'a> SseMinpsEmitter<Xmm, Xmm> for Assembler<'a> {
1130 fn sse_minps(&mut self, op0: Xmm, op1: Xmm) {
1131 self.emit(SSE_MINPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1132 }
1133}
1134
1135impl<'a> SseMinpsEmitter<Xmm, Mem> for Assembler<'a> {
1136 fn sse_minps(&mut self, op0: Xmm, op1: Mem) {
1137 self.emit(SSE_MINPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1138 }
1139}
1140
1141/// `SSE_MINSS` (MINSS).
1142/// Compares the low single precision floating-point values in the first source operand and the second source operand and returns the minimum value to the low doubleword of the destination operand.
1143///
1144///
1145/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINSS.html).
1146///
1147/// Supported operand variants:
1148///
1149/// ```text
1150/// +---+----------+
1151/// | # | Operands |
1152/// +---+----------+
1153/// | 1 | Xmm, Mem |
1154/// | 2 | Xmm, Xmm |
1155/// +---+----------+
1156/// ```
1157pub trait SseMinssEmitter<A, B> {
1158 fn sse_minss(&mut self, op0: A, op1: B);
1159}
1160
1161impl<'a> SseMinssEmitter<Xmm, Xmm> for Assembler<'a> {
1162 fn sse_minss(&mut self, op0: Xmm, op1: Xmm) {
1163 self.emit(SSE_MINSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1164 }
1165}
1166
1167impl<'a> SseMinssEmitter<Xmm, Mem> for Assembler<'a> {
1168 fn sse_minss(&mut self, op0: Xmm, op1: Mem) {
1169 self.emit(SSE_MINSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1170 }
1171}
1172
1173/// `SSE_MOVAPS` (MOVAPS).
1174/// Moves 4, 8 or 16 single precision floating-point values from the source operand (second operand) to the destination operand (first operand). This instruction can be used to load an XMM, YMM or ZMM register from an 128-bit, 256-bit or 512-bit memory location, to store the contents of an XMM, YMM or ZMM register into a 128-bit, 256-bit or 512-bit memory location, or to move data between two XMM, two YMM or two ZMM registers.
1175///
1176///
1177/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVAPS.html).
1178///
1179/// Supported operand variants:
1180///
1181/// ```text
1182/// +---+----------+
1183/// | # | Operands |
1184/// +---+----------+
1185/// | 1 | Mem, Xmm |
1186/// | 2 | Xmm, Mem |
1187/// | 3 | Xmm, Xmm |
1188/// +---+----------+
1189/// ```
1190pub trait SseMovapsEmitter<A, B> {
1191 fn sse_movaps(&mut self, op0: A, op1: B);
1192}
1193
1194impl<'a> SseMovapsEmitter<Xmm, Xmm> for Assembler<'a> {
1195 fn sse_movaps(&mut self, op0: Xmm, op1: Xmm) {
1196 self.emit(SSE_MOVAPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1197 }
1198}
1199
1200impl<'a> SseMovapsEmitter<Xmm, Mem> for Assembler<'a> {
1201 fn sse_movaps(&mut self, op0: Xmm, op1: Mem) {
1202 self.emit(SSE_MOVAPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1203 }
1204}
1205
1206impl<'a> SseMovapsEmitter<Mem, Xmm> for Assembler<'a> {
1207 fn sse_movaps(&mut self, op0: Mem, op1: Xmm) {
1208 self.emit(SSE_MOVAPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1209 }
1210}
1211
1212/// `SSE_MOVHLPS` (MOVHLPS).
1213/// This instruction cannot be used for memory to register moves.
1214///
1215///
1216/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHLPS.html).
1217///
1218/// Supported operand variants:
1219///
1220/// ```text
1221/// +---+----------+
1222/// | # | Operands |
1223/// +---+----------+
1224/// | 1 | Xmm, Xmm |
1225/// +---+----------+
1226/// ```
1227pub trait SseMovhlpsEmitter<A, B> {
1228 fn sse_movhlps(&mut self, op0: A, op1: B);
1229}
1230
1231impl<'a> SseMovhlpsEmitter<Xmm, Xmm> for Assembler<'a> {
1232 fn sse_movhlps(&mut self, op0: Xmm, op1: Xmm) {
1233 self.emit(SSE_MOVHLPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1234 }
1235}
1236
1237/// `SSE_MOVHPS` (MOVHPS).
1238/// This instruction cannot be used for register to register or memory to memory moves.
1239///
1240///
1241/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHPS.html).
1242///
1243/// Supported operand variants:
1244///
1245/// ```text
1246/// +---+----------+
1247/// | # | Operands |
1248/// +---+----------+
1249/// | 1 | Mem, Xmm |
1250/// | 2 | Xmm, Mem |
1251/// +---+----------+
1252/// ```
1253pub trait SseMovhpsEmitter<A, B> {
1254 fn sse_movhps(&mut self, op0: A, op1: B);
1255}
1256
1257impl<'a> SseMovhpsEmitter<Xmm, Mem> for Assembler<'a> {
1258 fn sse_movhps(&mut self, op0: Xmm, op1: Mem) {
1259 self.emit(SSE_MOVHPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1260 }
1261}
1262
1263impl<'a> SseMovhpsEmitter<Mem, Xmm> for Assembler<'a> {
1264 fn sse_movhps(&mut self, op0: Mem, op1: Xmm) {
1265 self.emit(SSE_MOVHPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1266 }
1267}
1268
1269/// `SSE_MOVLHPS` (MOVLHPS).
1270/// This instruction cannot be used for memory to register moves.
1271///
1272///
1273/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLHPS.html).
1274///
1275/// Supported operand variants:
1276///
1277/// ```text
1278/// +---+----------+
1279/// | # | Operands |
1280/// +---+----------+
1281/// | 1 | Xmm, Xmm |
1282/// +---+----------+
1283/// ```
1284pub trait SseMovlhpsEmitter<A, B> {
1285 fn sse_movlhps(&mut self, op0: A, op1: B);
1286}
1287
1288impl<'a> SseMovlhpsEmitter<Xmm, Xmm> for Assembler<'a> {
1289 fn sse_movlhps(&mut self, op0: Xmm, op1: Xmm) {
1290 self.emit(SSE_MOVLHPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1291 }
1292}
1293
1294/// `SSE_MOVLPS` (MOVLPS).
1295/// This instruction cannot be used for register to register or memory to memory moves.
1296///
1297///
1298/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLPS.html).
1299///
1300/// Supported operand variants:
1301///
1302/// ```text
1303/// +---+----------+
1304/// | # | Operands |
1305/// +---+----------+
1306/// | 1 | Mem, Xmm |
1307/// | 2 | Xmm, Mem |
1308/// +---+----------+
1309/// ```
1310pub trait SseMovlpsEmitter<A, B> {
1311 fn sse_movlps(&mut self, op0: A, op1: B);
1312}
1313
1314impl<'a> SseMovlpsEmitter<Xmm, Mem> for Assembler<'a> {
1315 fn sse_movlps(&mut self, op0: Xmm, op1: Mem) {
1316 self.emit(SSE_MOVLPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1317 }
1318}
1319
1320impl<'a> SseMovlpsEmitter<Mem, Xmm> for Assembler<'a> {
1321 fn sse_movlps(&mut self, op0: Mem, op1: Xmm) {
1322 self.emit(SSE_MOVLPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1323 }
1324}
1325
1326/// `SSE_MOVMSKPS` (MOVMSKPS).
1327/// Extracts the sign bits from the packed single precision floating-point values in the source operand (second operand), formats them into a 4- or 8-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM or YMM register, and the destination operand is a general-purpose register. The mask is stored in the 4 or 8 low-order bits of the destination operand. The upper bits of the destination operand beyond the mask are filled with zeros.
1328///
1329///
1330/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPS.html).
1331///
1332/// Supported operand variants:
1333///
1334/// ```text
1335/// +---+----------+
1336/// | # | Operands |
1337/// +---+----------+
1338/// | 1 | Gpq, Xmm |
1339/// +---+----------+
1340/// ```
1341pub trait SseMovmskpsEmitter<A, B> {
1342 fn sse_movmskps(&mut self, op0: A, op1: B);
1343}
1344
1345impl<'a> SseMovmskpsEmitter<Gpq, Xmm> for Assembler<'a> {
1346 fn sse_movmskps(&mut self, op0: Gpq, op1: Xmm) {
1347 self.emit(SSE_MOVMSKPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1348 }
1349}
1350
1351/// `SSE_MOVNTPS` (MOVNTPS).
1352/// Moves the packed single precision floating-point values in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain packed single precision, floating-pointing. The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (EVEX.512 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.
1353///
1354///
1355/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTPS.html).
1356///
1357/// Supported operand variants:
1358///
1359/// ```text
1360/// +---+----------+
1361/// | # | Operands |
1362/// +---+----------+
1363/// | 1 | Mem, Xmm |
1364/// +---+----------+
1365/// ```
1366pub trait SseMovntpsEmitter<A, B> {
1367 fn sse_movntps(&mut self, op0: A, op1: B);
1368}
1369
1370impl<'a> SseMovntpsEmitter<Mem, Xmm> for Assembler<'a> {
1371 fn sse_movntps(&mut self, op0: Mem, op1: Xmm) {
1372 self.emit(SSE_MOVNTPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1373 }
1374}
1375
1376/// `SSE_MOVNTSS`.
1377///
1378/// Supported operand variants:
1379///
1380/// ```text
1381/// +---+----------+
1382/// | # | Operands |
1383/// +---+----------+
1384/// | 1 | Mem, Xmm |
1385/// +---+----------+
1386/// ```
1387pub trait SseMovntssEmitter<A, B> {
1388 fn sse_movntss(&mut self, op0: A, op1: B);
1389}
1390
1391impl<'a> SseMovntssEmitter<Mem, Xmm> for Assembler<'a> {
1392 fn sse_movntss(&mut self, op0: Mem, op1: Xmm) {
1393 self.emit(SSE_MOVNTSSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1394 }
1395}
1396
1397/// `SSE_MOVSS` (MOVSS).
1398/// Moves a scalar single precision floating-point value from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be XMM registers or 32-bit memory locations. This instruction can be used to move a single precision floating-point value to and from the low doubleword of an XMM register and a 32-bit memory location, or to move a single precision floating-point value between the low doublewords of two XMM registers. The instruction cannot be used to transfer data between memory locations.
1399///
1400///
1401/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVSS.html).
1402///
1403/// Supported operand variants:
1404///
1405/// ```text
1406/// +---+----------+
1407/// | # | Operands |
1408/// +---+----------+
1409/// | 1 | Mem, Xmm |
1410/// | 2 | Xmm, Mem |
1411/// | 3 | Xmm, Xmm |
1412/// +---+----------+
1413/// ```
1414pub trait SseMovssEmitter<A, B> {
1415 fn sse_movss(&mut self, op0: A, op1: B);
1416}
1417
1418impl<'a> SseMovssEmitter<Xmm, Xmm> for Assembler<'a> {
1419 fn sse_movss(&mut self, op0: Xmm, op1: Xmm) {
1420 self.emit(SSE_MOVSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1421 }
1422}
1423
1424impl<'a> SseMovssEmitter<Xmm, Mem> for Assembler<'a> {
1425 fn sse_movss(&mut self, op0: Xmm, op1: Mem) {
1426 self.emit(SSE_MOVSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1427 }
1428}
1429
1430impl<'a> SseMovssEmitter<Mem, Xmm> for Assembler<'a> {
1431 fn sse_movss(&mut self, op0: Mem, op1: Xmm) {
1432 self.emit(SSE_MOVSSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1433 }
1434}
1435
1436/// `SSE_MOVUPS` (MOVUPS).
1437/// Note: VEX.vvvv and EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
1438///
1439///
1440/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVUPS.html).
1441///
1442/// Supported operand variants:
1443///
1444/// ```text
1445/// +---+----------+
1446/// | # | Operands |
1447/// +---+----------+
1448/// | 1 | Mem, Xmm |
1449/// | 2 | Xmm, Mem |
1450/// | 3 | Xmm, Xmm |
1451/// +---+----------+
1452/// ```
1453pub trait SseMovupsEmitter<A, B> {
1454 fn sse_movups(&mut self, op0: A, op1: B);
1455}
1456
1457impl<'a> SseMovupsEmitter<Xmm, Xmm> for Assembler<'a> {
1458 fn sse_movups(&mut self, op0: Xmm, op1: Xmm) {
1459 self.emit(SSE_MOVUPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1460 }
1461}
1462
1463impl<'a> SseMovupsEmitter<Xmm, Mem> for Assembler<'a> {
1464 fn sse_movups(&mut self, op0: Xmm, op1: Mem) {
1465 self.emit(SSE_MOVUPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1466 }
1467}
1468
1469impl<'a> SseMovupsEmitter<Mem, Xmm> for Assembler<'a> {
1470 fn sse_movups(&mut self, op0: Mem, op1: Xmm) {
1471 self.emit(SSE_MOVUPSMR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1472 }
1473}
1474
1475/// `SSE_MULPS` (MULPS).
1476/// Multiply the packed single precision floating-point values from the first source operand with the corresponding values in the second source operand, and stores the packed double precision floating-point results in the destination operand.
1477///
1478///
1479/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULPS.html).
1480///
1481/// Supported operand variants:
1482///
1483/// ```text
1484/// +---+----------+
1485/// | # | Operands |
1486/// +---+----------+
1487/// | 1 | Xmm, Mem |
1488/// | 2 | Xmm, Xmm |
1489/// +---+----------+
1490/// ```
1491pub trait SseMulpsEmitter<A, B> {
1492 fn sse_mulps(&mut self, op0: A, op1: B);
1493}
1494
1495impl<'a> SseMulpsEmitter<Xmm, Xmm> for Assembler<'a> {
1496 fn sse_mulps(&mut self, op0: Xmm, op1: Xmm) {
1497 self.emit(SSE_MULPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1498 }
1499}
1500
1501impl<'a> SseMulpsEmitter<Xmm, Mem> for Assembler<'a> {
1502 fn sse_mulps(&mut self, op0: Xmm, op1: Mem) {
1503 self.emit(SSE_MULPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1504 }
1505}
1506
1507/// `SSE_MULSS` (MULSS).
1508/// Multiplies the low single precision floating-point value from the second source operand by the low single precision floating-point value in the first source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source operand and the destination operands are XMM registers.
1509///
1510///
1511/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULSS.html).
1512///
1513/// Supported operand variants:
1514///
1515/// ```text
1516/// +---+----------+
1517/// | # | Operands |
1518/// +---+----------+
1519/// | 1 | Xmm, Mem |
1520/// | 2 | Xmm, Xmm |
1521/// +---+----------+
1522/// ```
1523pub trait SseMulssEmitter<A, B> {
1524 fn sse_mulss(&mut self, op0: A, op1: B);
1525}
1526
1527impl<'a> SseMulssEmitter<Xmm, Xmm> for Assembler<'a> {
1528 fn sse_mulss(&mut self, op0: Xmm, op1: Xmm) {
1529 self.emit(SSE_MULSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1530 }
1531}
1532
1533impl<'a> SseMulssEmitter<Xmm, Mem> for Assembler<'a> {
1534 fn sse_mulss(&mut self, op0: Xmm, op1: Mem) {
1535 self.emit(SSE_MULSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1536 }
1537}
1538
1539/// `SSE_ORPS` (ORPS).
1540/// Performs a bitwise logical OR of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
1541///
1542///
1543/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ORPS.html).
1544///
1545/// Supported operand variants:
1546///
1547/// ```text
1548/// +---+----------+
1549/// | # | Operands |
1550/// +---+----------+
1551/// | 1 | Xmm, Mem |
1552/// | 2 | Xmm, Xmm |
1553/// +---+----------+
1554/// ```
1555pub trait SseOrpsEmitter<A, B> {
1556 fn sse_orps(&mut self, op0: A, op1: B);
1557}
1558
1559impl<'a> SseOrpsEmitter<Xmm, Xmm> for Assembler<'a> {
1560 fn sse_orps(&mut self, op0: Xmm, op1: Xmm) {
1561 self.emit(SSE_ORPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1562 }
1563}
1564
1565impl<'a> SseOrpsEmitter<Xmm, Mem> for Assembler<'a> {
1566 fn sse_orps(&mut self, op0: Xmm, op1: Mem) {
1567 self.emit(SSE_ORPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1568 }
1569}
1570
1571/// `SSE_RCPPS` (RCPPS).
1572/// Performs a SIMD computation of the approximate reciprocals of the four packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
1573///
1574///
1575/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPPS.html).
1576///
1577/// Supported operand variants:
1578///
1579/// ```text
1580/// +---+----------+
1581/// | # | Operands |
1582/// +---+----------+
1583/// | 1 | Xmm, Mem |
1584/// | 2 | Xmm, Xmm |
1585/// +---+----------+
1586/// ```
1587pub trait SseRcppsEmitter<A, B> {
1588 fn sse_rcpps(&mut self, op0: A, op1: B);
1589}
1590
1591impl<'a> SseRcppsEmitter<Xmm, Xmm> for Assembler<'a> {
1592 fn sse_rcpps(&mut self, op0: Xmm, op1: Xmm) {
1593 self.emit(SSE_RCPPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1594 }
1595}
1596
1597impl<'a> SseRcppsEmitter<Xmm, Mem> for Assembler<'a> {
1598 fn sse_rcpps(&mut self, op0: Xmm, op1: Mem) {
1599 self.emit(SSE_RCPPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1600 }
1601}
1602
1603/// `SSE_RCPSS` (RCPSS).
1604/// Computes of an approximate reciprocal of the low single precision floating-point value in the source operand (second operand) and stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
1605///
1606///
1607/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPSS.html).
1608///
1609/// Supported operand variants:
1610///
1611/// ```text
1612/// +---+----------+
1613/// | # | Operands |
1614/// +---+----------+
1615/// | 1 | Xmm, Mem |
1616/// | 2 | Xmm, Xmm |
1617/// +---+----------+
1618/// ```
1619pub trait SseRcpssEmitter<A, B> {
1620 fn sse_rcpss(&mut self, op0: A, op1: B);
1621}
1622
1623impl<'a> SseRcpssEmitter<Xmm, Xmm> for Assembler<'a> {
1624 fn sse_rcpss(&mut self, op0: Xmm, op1: Xmm) {
1625 self.emit(SSE_RCPSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1626 }
1627}
1628
1629impl<'a> SseRcpssEmitter<Xmm, Mem> for Assembler<'a> {
1630 fn sse_rcpss(&mut self, op0: Xmm, op1: Mem) {
1631 self.emit(SSE_RCPSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1632 }
1633}
1634
1635/// `SSE_RSQRTPS` (RSQRTPS).
1636/// Performs a SIMD computation of the approximate reciprocals of the square roots of the four packed single precision floating-point values in the source operand (second operand) and stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
1637///
1638///
1639/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTPS.html).
1640///
1641/// Supported operand variants:
1642///
1643/// ```text
1644/// +---+----------+
1645/// | # | Operands |
1646/// +---+----------+
1647/// | 1 | Xmm, Mem |
1648/// | 2 | Xmm, Xmm |
1649/// +---+----------+
1650/// ```
1651pub trait SseRsqrtpsEmitter<A, B> {
1652 fn sse_rsqrtps(&mut self, op0: A, op1: B);
1653}
1654
1655impl<'a> SseRsqrtpsEmitter<Xmm, Xmm> for Assembler<'a> {
1656 fn sse_rsqrtps(&mut self, op0: Xmm, op1: Xmm) {
1657 self.emit(SSE_RSQRTPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1658 }
1659}
1660
1661impl<'a> SseRsqrtpsEmitter<Xmm, Mem> for Assembler<'a> {
1662 fn sse_rsqrtps(&mut self, op0: Xmm, op1: Mem) {
1663 self.emit(SSE_RSQRTPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1664 }
1665}
1666
1667/// `SSE_RSQRTSS` (RSQRTSS).
1668/// Computes an approximate reciprocal of the square root of the low single precision floating-point value in the source operand (second operand) stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
1669///
1670///
1671/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTSS.html).
1672///
1673/// Supported operand variants:
1674///
1675/// ```text
1676/// +---+----------+
1677/// | # | Operands |
1678/// +---+----------+
1679/// | 1 | Xmm, Mem |
1680/// | 2 | Xmm, Xmm |
1681/// +---+----------+
1682/// ```
1683pub trait SseRsqrtssEmitter<A, B> {
1684 fn sse_rsqrtss(&mut self, op0: A, op1: B);
1685}
1686
1687impl<'a> SseRsqrtssEmitter<Xmm, Xmm> for Assembler<'a> {
1688 fn sse_rsqrtss(&mut self, op0: Xmm, op1: Xmm) {
1689 self.emit(SSE_RSQRTSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1690 }
1691}
1692
1693impl<'a> SseRsqrtssEmitter<Xmm, Mem> for Assembler<'a> {
1694 fn sse_rsqrtss(&mut self, op0: Xmm, op1: Mem) {
1695 self.emit(SSE_RSQRTSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1696 }
1697}
1698
1699/// `SSE_SHUFPS` (SHUFPS).
1700/// Selects a single precision floating-point value of an input quadruplet using a two-bit control and move to a designated element of the destination operand. Each 64-bit element-pair of a 128-bit lane of the destination operand is interleaved between the corresponding lane of the first source operand and the second source operand at the granularity 128 bits. Each two bits in the imm8 byte, starting from bit 0, is the select control of the corresponding element of a 128-bit lane of the destination to received the shuffled result of an input quadruplet. The two lower elements of a 128-bit lane in the destination receives shuffle results from the quadruple of the first source operand. The next two elements of the destination receives shuffle results from the quadruple of the second source operand.
1701///
1702///
1703/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SHUFPS.html).
1704///
1705/// Supported operand variants:
1706///
1707/// ```text
1708/// +---+---------------+
1709/// | # | Operands |
1710/// +---+---------------+
1711/// | 1 | Xmm, Mem, Imm |
1712/// | 2 | Xmm, Xmm, Imm |
1713/// +---+---------------+
1714/// ```
1715pub trait SseShufpsEmitter<A, B, C> {
1716 fn sse_shufps(&mut self, op0: A, op1: B, op2: C);
1717}
1718
1719impl<'a> SseShufpsEmitter<Xmm, Xmm, Imm> for Assembler<'a> {
1720 fn sse_shufps(&mut self, op0: Xmm, op1: Xmm, op2: Imm) {
1721 self.emit(SSE_SHUFPSRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1722 }
1723}
1724
1725impl<'a> SseShufpsEmitter<Xmm, Mem, Imm> for Assembler<'a> {
1726 fn sse_shufps(&mut self, op0: Xmm, op1: Mem, op2: Imm) {
1727 self.emit(SSE_SHUFPSRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
1728 }
1729}
1730
1731/// `SSE_SQRTPS` (SQRTPS).
1732/// Performs a SIMD computation of the square roots of the four, eight or sixteen packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand.
1733///
1734///
1735/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTPS.html).
1736///
1737/// Supported operand variants:
1738///
1739/// ```text
1740/// +---+----------+
1741/// | # | Operands |
1742/// +---+----------+
1743/// | 1 | Xmm, Mem |
1744/// | 2 | Xmm, Xmm |
1745/// +---+----------+
1746/// ```
1747pub trait SseSqrtpsEmitter<A, B> {
1748 fn sse_sqrtps(&mut self, op0: A, op1: B);
1749}
1750
1751impl<'a> SseSqrtpsEmitter<Xmm, Xmm> for Assembler<'a> {
1752 fn sse_sqrtps(&mut self, op0: Xmm, op1: Xmm) {
1753 self.emit(SSE_SQRTPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1754 }
1755}
1756
1757impl<'a> SseSqrtpsEmitter<Xmm, Mem> for Assembler<'a> {
1758 fn sse_sqrtps(&mut self, op0: Xmm, op1: Mem) {
1759 self.emit(SSE_SQRTPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1760 }
1761}
1762
1763/// `SSE_SQRTSS` (SQRTSS).
1764/// Computes the square root of the low single precision floating-point value in the second source operand and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source and destination operands is an XMM register.
1765///
1766///
1767/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTSS.html).
1768///
1769/// Supported operand variants:
1770///
1771/// ```text
1772/// +---+----------+
1773/// | # | Operands |
1774/// +---+----------+
1775/// | 1 | Xmm, Mem |
1776/// | 2 | Xmm, Xmm |
1777/// +---+----------+
1778/// ```
1779pub trait SseSqrtssEmitter<A, B> {
1780 fn sse_sqrtss(&mut self, op0: A, op1: B);
1781}
1782
1783impl<'a> SseSqrtssEmitter<Xmm, Xmm> for Assembler<'a> {
1784 fn sse_sqrtss(&mut self, op0: Xmm, op1: Xmm) {
1785 self.emit(SSE_SQRTSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1786 }
1787}
1788
1789impl<'a> SseSqrtssEmitter<Xmm, Mem> for Assembler<'a> {
1790 fn sse_sqrtss(&mut self, op0: Xmm, op1: Mem) {
1791 self.emit(SSE_SQRTSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1792 }
1793}
1794
1795/// `SSE_SUBPS` (SUBPS).
1796/// Performs a SIMD subtract of the packed single precision floating-point values in the second Source operand from the First Source operand, and stores the packed single precision floating-point results in the destination operand.
1797///
1798///
1799/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBPS.html).
1800///
1801/// Supported operand variants:
1802///
1803/// ```text
1804/// +---+----------+
1805/// | # | Operands |
1806/// +---+----------+
1807/// | 1 | Xmm, Mem |
1808/// | 2 | Xmm, Xmm |
1809/// +---+----------+
1810/// ```
1811pub trait SseSubpsEmitter<A, B> {
1812 fn sse_subps(&mut self, op0: A, op1: B);
1813}
1814
1815impl<'a> SseSubpsEmitter<Xmm, Xmm> for Assembler<'a> {
1816 fn sse_subps(&mut self, op0: Xmm, op1: Xmm) {
1817 self.emit(SSE_SUBPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1818 }
1819}
1820
1821impl<'a> SseSubpsEmitter<Xmm, Mem> for Assembler<'a> {
1822 fn sse_subps(&mut self, op0: Xmm, op1: Mem) {
1823 self.emit(SSE_SUBPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1824 }
1825}
1826
1827/// `SSE_SUBSS` (SUBSS).
1828/// Subtract the low single precision floating-point value from the second source operand and the first source operand and store the double precision floating-point result in the low doubleword of the destination operand.
1829///
1830///
1831/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBSS.html).
1832///
1833/// Supported operand variants:
1834///
1835/// ```text
1836/// +---+----------+
1837/// | # | Operands |
1838/// +---+----------+
1839/// | 1 | Xmm, Mem |
1840/// | 2 | Xmm, Xmm |
1841/// +---+----------+
1842/// ```
1843pub trait SseSubssEmitter<A, B> {
1844 fn sse_subss(&mut self, op0: A, op1: B);
1845}
1846
1847impl<'a> SseSubssEmitter<Xmm, Xmm> for Assembler<'a> {
1848 fn sse_subss(&mut self, op0: Xmm, op1: Xmm) {
1849 self.emit(SSE_SUBSSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1850 }
1851}
1852
1853impl<'a> SseSubssEmitter<Xmm, Mem> for Assembler<'a> {
1854 fn sse_subss(&mut self, op0: Xmm, op1: Mem) {
1855 self.emit(SSE_SUBSSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1856 }
1857}
1858
1859/// `SSE_UCOMISS` (UCOMISS).
1860/// Compares the single precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
1861///
1862///
1863/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UCOMISS.html).
1864///
1865/// Supported operand variants:
1866///
1867/// ```text
1868/// +---+----------+
1869/// | # | Operands |
1870/// +---+----------+
1871/// | 1 | Xmm, Mem |
1872/// | 2 | Xmm, Xmm |
1873/// +---+----------+
1874/// ```
1875pub trait SseUcomissEmitter<A, B> {
1876 fn sse_ucomiss(&mut self, op0: A, op1: B);
1877}
1878
1879impl<'a> SseUcomissEmitter<Xmm, Xmm> for Assembler<'a> {
1880 fn sse_ucomiss(&mut self, op0: Xmm, op1: Xmm) {
1881 self.emit(SSE_UCOMISSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1882 }
1883}
1884
1885impl<'a> SseUcomissEmitter<Xmm, Mem> for Assembler<'a> {
1886 fn sse_ucomiss(&mut self, op0: Xmm, op1: Mem) {
1887 self.emit(SSE_UCOMISSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1888 }
1889}
1890
1891/// `SSE_UNPCKHPS` (UNPCKHPS).
1892/// Performs an interleaved unpack of the high single precision floating-point values from the first source operand and the second source operand.
1893///
1894///
1895/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKHPS.html).
1896///
1897/// Supported operand variants:
1898///
1899/// ```text
1900/// +---+----------+
1901/// | # | Operands |
1902/// +---+----------+
1903/// | 1 | Xmm, Mem |
1904/// | 2 | Xmm, Xmm |
1905/// +---+----------+
1906/// ```
1907pub trait SseUnpckhpsEmitter<A, B> {
1908 fn sse_unpckhps(&mut self, op0: A, op1: B);
1909}
1910
1911impl<'a> SseUnpckhpsEmitter<Xmm, Xmm> for Assembler<'a> {
1912 fn sse_unpckhps(&mut self, op0: Xmm, op1: Xmm) {
1913 self.emit(SSE_UNPCKHPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1914 }
1915}
1916
1917impl<'a> SseUnpckhpsEmitter<Xmm, Mem> for Assembler<'a> {
1918 fn sse_unpckhps(&mut self, op0: Xmm, op1: Mem) {
1919 self.emit(SSE_UNPCKHPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1920 }
1921}
1922
1923/// `SSE_UNPCKLPS` (UNPCKLPS).
1924/// Performs an interleaved unpack of the low single precision floating-point values from the first source operand and the second source operand.
1925///
1926///
1927/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKLPS.html).
1928///
1929/// Supported operand variants:
1930///
1931/// ```text
1932/// +---+----------+
1933/// | # | Operands |
1934/// +---+----------+
1935/// | 1 | Xmm, Mem |
1936/// | 2 | Xmm, Xmm |
1937/// +---+----------+
1938/// ```
1939pub trait SseUnpcklpsEmitter<A, B> {
1940 fn sse_unpcklps(&mut self, op0: A, op1: B);
1941}
1942
1943impl<'a> SseUnpcklpsEmitter<Xmm, Xmm> for Assembler<'a> {
1944 fn sse_unpcklps(&mut self, op0: Xmm, op1: Xmm) {
1945 self.emit(SSE_UNPCKLPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1946 }
1947}
1948
1949impl<'a> SseUnpcklpsEmitter<Xmm, Mem> for Assembler<'a> {
1950 fn sse_unpcklps(&mut self, op0: Xmm, op1: Mem) {
1951 self.emit(SSE_UNPCKLPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1952 }
1953}
1954
1955/// `SSE_XORPS` (XORPS).
1956/// Performs a bitwise logical XOR of the four, eight or sixteen packed single-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
1957///
1958///
1959/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/XORPS.html).
1960///
1961/// Supported operand variants:
1962///
1963/// ```text
1964/// +---+----------+
1965/// | # | Operands |
1966/// +---+----------+
1967/// | 1 | Xmm, Mem |
1968/// | 2 | Xmm, Xmm |
1969/// +---+----------+
1970/// ```
1971pub trait SseXorpsEmitter<A, B> {
1972 fn sse_xorps(&mut self, op0: A, op1: B);
1973}
1974
1975impl<'a> SseXorpsEmitter<Xmm, Xmm> for Assembler<'a> {
1976 fn sse_xorps(&mut self, op0: Xmm, op1: Xmm) {
1977 self.emit(SSE_XORPSRR, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1978 }
1979}
1980
1981impl<'a> SseXorpsEmitter<Xmm, Mem> for Assembler<'a> {
1982 fn sse_xorps(&mut self, op0: Xmm, op1: Mem) {
1983 self.emit(SSE_XORPSRM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
1984 }
1985}
1986
1987/// `STMXCSR` (STMXCSR).
1988/// Stores the contents of the MXCSR control and status register to the destination operand. The destination operand is a 32-bit memory location. The reserved bits in the MXCSR register are stored as 0s.
1989///
1990///
1991/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/STMXCSR.html).
1992///
1993/// Supported operand variants:
1994///
1995/// ```text
1996/// +---+----------+
1997/// | # | Operands |
1998/// +---+----------+
1999/// | 1 | Mem |
2000/// +---+----------+
2001/// ```
2002pub trait StmxcsrEmitter<A> {
2003 fn stmxcsr(&mut self, op0: A);
2004}
2005
2006impl<'a> StmxcsrEmitter<Mem> for Assembler<'a> {
2007 fn stmxcsr(&mut self, op0: Mem) {
2008 self.emit(STMXCSRM, op0.as_operand(), &NOREG, &NOREG, &NOREG);
2009 }
2010}
2011
2012
2013impl<'a> Assembler<'a> {
2014 /// `LDMXCSR` (LDMXCSR).
2015 /// Loads the source operand into the MXCSR control/status register. The source operand is a 32-bit memory location. See “MXCSR Control and Status Register” in Chapter 10, of the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for a description of the MXCSR register and its contents.
2016 ///
2017 ///
2018 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/LDMXCSR.html).
2019 ///
2020 /// Supported operand variants:
2021 ///
2022 /// ```text
2023 /// +---+----------+
2024 /// | # | Operands |
2025 /// +---+----------+
2026 /// | 1 | Mem |
2027 /// +---+----------+
2028 /// ```
2029 #[inline]
2030 pub fn ldmxcsr<A>(&mut self, op0: A)
2031 where Assembler<'a>: LdmxcsrEmitter<A> {
2032 <Self as LdmxcsrEmitter<A>>::ldmxcsr(self, op0);
2033 }
2034 /// `MMX_MASKMOVQ` (MASKMOVQ).
2035 /// Stores selected bytes from the source operand (first operand) into a 64-bit memory location. The mask operand (second operand) selects which bytes from the source operand are written to memory. The source and mask operands are MMX technology registers. The memory location specified by the effective address in the DI/EDI/RDI register (the default segment register is DS, but this may be overridden with a segment-override prefix). The memory location does not need to be aligned on a natural boundary. (The size of the store address depends on the address-size attribute.)
2036 ///
2037 ///
2038 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MASKMOVQ.html).
2039 ///
2040 /// Supported operand variants:
2041 ///
2042 /// ```text
2043 /// +---+----------+
2044 /// | # | Operands |
2045 /// +---+----------+
2046 /// | 1 | Mm, Mm |
2047 /// +---+----------+
2048 /// ```
2049 #[inline]
2050 pub fn mmx_maskmovq<A, B>(&mut self, op0: A, op1: B)
2051 where Assembler<'a>: MmxMaskmovqEmitter<A, B> {
2052 <Self as MmxMaskmovqEmitter<A, B>>::mmx_maskmovq(self, op0, op1);
2053 }
2054 /// `MMX_MOVDQ2Q`.
2055 ///
2056 /// Supported operand variants:
2057 ///
2058 /// ```text
2059 /// +---+----------+
2060 /// | # | Operands |
2061 /// +---+----------+
2062 /// | 1 | Mm, Xmm |
2063 /// +---+----------+
2064 /// ```
2065 #[inline]
2066 pub fn mmx_movdq2q<A, B>(&mut self, op0: A, op1: B)
2067 where Assembler<'a>: MmxMovdq2qEmitter<A, B> {
2068 <Self as MmxMovdq2qEmitter<A, B>>::mmx_movdq2q(self, op0, op1);
2069 }
2070 /// `MMX_MOVNTQ` (MOVNTQ).
2071 /// Moves the quadword in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to minimize cache pollution during the write to memory. The source operand is an MMX technology register, which is assumed to contain packed integer data (packed bytes, words, or doublewords). The destination operand is a 64-bit memory location.
2072 ///
2073 ///
2074 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTQ.html).
2075 ///
2076 /// Supported operand variants:
2077 ///
2078 /// ```text
2079 /// +---+----------+
2080 /// | # | Operands |
2081 /// +---+----------+
2082 /// | 1 | Mem, Mm |
2083 /// +---+----------+
2084 /// ```
2085 #[inline]
2086 pub fn mmx_movntq<A, B>(&mut self, op0: A, op1: B)
2087 where Assembler<'a>: MmxMovntqEmitter<A, B> {
2088 <Self as MmxMovntqEmitter<A, B>>::mmx_movntq(self, op0, op1);
2089 }
2090 /// `MMX_MOVQ2DQ` (MOVQ2DQ).
2091 /// Moves the quadword from the source operand (second operand) to the low quadword of the destination operand (first operand). The source operand is an MMX technology register and the destination operand is an XMM register.
2092 ///
2093 ///
2094 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVQ2DQ.html).
2095 ///
2096 /// Supported operand variants:
2097 ///
2098 /// ```text
2099 /// +---+----------+
2100 /// | # | Operands |
2101 /// +---+----------+
2102 /// | 1 | Xmm, Mm |
2103 /// +---+----------+
2104 /// ```
2105 #[inline]
2106 pub fn mmx_movq2dq<A, B>(&mut self, op0: A, op1: B)
2107 where Assembler<'a>: MmxMovq2dqEmitter<A, B> {
2108 <Self as MmxMovq2dqEmitter<A, B>>::mmx_movq2dq(self, op0, op1);
2109 }
2110 /// `MMX_PAVGB` (PAVGB).
2111 /// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
2112 ///
2113 ///
2114 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
2115 ///
2116 /// Supported operand variants:
2117 ///
2118 /// ```text
2119 /// +---+----------+
2120 /// | # | Operands |
2121 /// +---+----------+
2122 /// | 1 | Mm, Mem |
2123 /// | 2 | Mm, Mm |
2124 /// +---+----------+
2125 /// ```
2126 #[inline]
2127 pub fn mmx_pavgb<A, B>(&mut self, op0: A, op1: B)
2128 where Assembler<'a>: MmxPavgbEmitter<A, B> {
2129 <Self as MmxPavgbEmitter<A, B>>::mmx_pavgb(self, op0, op1);
2130 }
2131 /// `MMX_PAVGW` (PAVGW).
2132 /// Performs a SIMD average of the packed unsigned integers from the source operand (second operand) and the destination operand (first operand), and stores the results in the destination operand. For each corresponding pair of data elements in the first and second operands, the elements are added together, a 1 is added to the temporary sum, and that result is shifted right one bit position.
2133 ///
2134 ///
2135 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PAVGB%3APAVGW.html).
2136 ///
2137 /// Supported operand variants:
2138 ///
2139 /// ```text
2140 /// +---+----------+
2141 /// | # | Operands |
2142 /// +---+----------+
2143 /// | 1 | Mm, Mem |
2144 /// | 2 | Mm, Mm |
2145 /// +---+----------+
2146 /// ```
2147 #[inline]
2148 pub fn mmx_pavgw<A, B>(&mut self, op0: A, op1: B)
2149 where Assembler<'a>: MmxPavgwEmitter<A, B> {
2150 <Self as MmxPavgwEmitter<A, B>>::mmx_pavgw(self, op0, op1);
2151 }
2152 /// `MMX_PEXTRW` (PEXTRW).
2153 /// Copies the word in the source operand (second operand) specified by the count operand (third operand) to the destination operand (first operand). The source operand can be an MMX technology register or an XMM register. The destination operand can be the low word of a general-purpose register or a 16-bit memory address. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location. The content of the destination register above bit 16 is cleared (set to all 0s).
2154 ///
2155 ///
2156 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PEXTRW.html).
2157 ///
2158 /// Supported operand variants:
2159 ///
2160 /// ```text
2161 /// +---+--------------+
2162 /// | # | Operands |
2163 /// +---+--------------+
2164 /// | 1 | Gpq, Mm, Imm |
2165 /// +---+--------------+
2166 /// ```
2167 #[inline]
2168 pub fn mmx_pextrw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2169 where Assembler<'a>: MmxPextrwEmitter<A, B, C> {
2170 <Self as MmxPextrwEmitter<A, B, C>>::mmx_pextrw(self, op0, op1, op2);
2171 }
2172 /// `MMX_PINSRW` (PINSRW).
2173 /// Three operand MMX and SSE instructions
2174 ///
2175 ///
2176 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PINSRW.html).
2177 ///
2178 /// Supported operand variants:
2179 ///
2180 /// ```text
2181 /// +---+--------------+
2182 /// | # | Operands |
2183 /// +---+--------------+
2184 /// | 1 | Mm, Gpd, Imm |
2185 /// | 2 | Mm, Mem, Imm |
2186 /// +---+--------------+
2187 /// ```
2188 #[inline]
2189 pub fn mmx_pinsrw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2190 where Assembler<'a>: MmxPinsrwEmitter<A, B, C> {
2191 <Self as MmxPinsrwEmitter<A, B, C>>::mmx_pinsrw(self, op0, op1, op2);
2192 }
2193 /// `MMX_PMAXSW` (PMAXSW).
2194 /// Performs a SIMD compare of the packed signed byte, word, dword or qword integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
2195 ///
2196 ///
2197 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXSB%3APMAXSW%3APMAXSD%3APMAXSQ.html).
2198 ///
2199 /// Supported operand variants:
2200 ///
2201 /// ```text
2202 /// +---+----------+
2203 /// | # | Operands |
2204 /// +---+----------+
2205 /// | 1 | Mm, Mem |
2206 /// | 2 | Mm, Mm |
2207 /// +---+----------+
2208 /// ```
2209 #[inline]
2210 pub fn mmx_pmaxsw<A, B>(&mut self, op0: A, op1: B)
2211 where Assembler<'a>: MmxPmaxswEmitter<A, B> {
2212 <Self as MmxPmaxswEmitter<A, B>>::mmx_pmaxsw(self, op0, op1);
2213 }
2214 /// `MMX_PMAXUB` (PMAXUB).
2215 /// Performs a SIMD compare of the packed unsigned byte, word integers in the second source operand and the first source operand and returns the maximum value for each pair of integers to the destination operand.
2216 ///
2217 ///
2218 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMAXUB%3APMAXUW.html).
2219 ///
2220 /// Supported operand variants:
2221 ///
2222 /// ```text
2223 /// +---+----------+
2224 /// | # | Operands |
2225 /// +---+----------+
2226 /// | 1 | Mm, Mem |
2227 /// | 2 | Mm, Mm |
2228 /// +---+----------+
2229 /// ```
2230 #[inline]
2231 pub fn mmx_pmaxub<A, B>(&mut self, op0: A, op1: B)
2232 where Assembler<'a>: MmxPmaxubEmitter<A, B> {
2233 <Self as MmxPmaxubEmitter<A, B>>::mmx_pmaxub(self, op0, op1);
2234 }
2235 /// `MMX_PMINSW` (PMINSW).
2236 /// Performs a SIMD compare of the packed signed byte, word, or dword integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
2237 ///
2238 ///
2239 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINSB%3APMINSW.html).
2240 ///
2241 /// Supported operand variants:
2242 ///
2243 /// ```text
2244 /// +---+----------+
2245 /// | # | Operands |
2246 /// +---+----------+
2247 /// | 1 | Mm, Mem |
2248 /// | 2 | Mm, Mm |
2249 /// +---+----------+
2250 /// ```
2251 #[inline]
2252 pub fn mmx_pminsw<A, B>(&mut self, op0: A, op1: B)
2253 where Assembler<'a>: MmxPminswEmitter<A, B> {
2254 <Self as MmxPminswEmitter<A, B>>::mmx_pminsw(self, op0, op1);
2255 }
2256 /// `MMX_PMINUB` (PMINUB).
2257 /// Performs a SIMD compare of the packed unsigned byte or word integers in the second source operand and the first source operand and returns the minimum value for each pair of integers to the destination operand.
2258 ///
2259 ///
2260 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMINUB%3APMINUW.html).
2261 ///
2262 /// Supported operand variants:
2263 ///
2264 /// ```text
2265 /// +---+----------+
2266 /// | # | Operands |
2267 /// +---+----------+
2268 /// | 1 | Mm, Mem |
2269 /// | 2 | Mm, Mm |
2270 /// +---+----------+
2271 /// ```
2272 #[inline]
2273 pub fn mmx_pminub<A, B>(&mut self, op0: A, op1: B)
2274 where Assembler<'a>: MmxPminubEmitter<A, B> {
2275 <Self as MmxPminubEmitter<A, B>>::mmx_pminub(self, op0, op1);
2276 }
2277 /// `MMX_PMOVMSKB` (PMOVMSKB).
2278 /// Creates a mask made up of the most significant bit of each byte of the source operand (second operand) and stores the result in the low byte or word of the destination operand (first operand).
2279 ///
2280 ///
2281 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMOVMSKB.html).
2282 ///
2283 /// Supported operand variants:
2284 ///
2285 /// ```text
2286 /// +---+----------+
2287 /// | # | Operands |
2288 /// +---+----------+
2289 /// | 1 | Gpq, Mm |
2290 /// +---+----------+
2291 /// ```
2292 #[inline]
2293 pub fn mmx_pmovmskb<A, B>(&mut self, op0: A, op1: B)
2294 where Assembler<'a>: MmxPmovmskbEmitter<A, B> {
2295 <Self as MmxPmovmskbEmitter<A, B>>::mmx_pmovmskb(self, op0, op1);
2296 }
2297 /// `MMX_PMULHUW` (PMULHUW).
2298 /// Performs a SIMD unsigned multiply of the packed unsigned word integers in the destination operand (first operand) and the source operand (second operand), and stores the high 16 bits of each 32-bit intermediate results in the destination operand. (Figure 4-12 shows this operation when using 64-bit operands.)
2299 ///
2300 ///
2301 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PMULHUW.html).
2302 ///
2303 /// Supported operand variants:
2304 ///
2305 /// ```text
2306 /// +---+----------+
2307 /// | # | Operands |
2308 /// +---+----------+
2309 /// | 1 | Mm, Mem |
2310 /// | 2 | Mm, Mm |
2311 /// +---+----------+
2312 /// ```
2313 #[inline]
2314 pub fn mmx_pmulhuw<A, B>(&mut self, op0: A, op1: B)
2315 where Assembler<'a>: MmxPmulhuwEmitter<A, B> {
2316 <Self as MmxPmulhuwEmitter<A, B>>::mmx_pmulhuw(self, op0, op1);
2317 }
2318 /// `MMX_PSADBW` (PSADBW).
2319 /// Computes the absolute value of the difference of 8 unsigned byte integers from the source operand (second operand) and from the destination operand (first operand). These 8 differences are then summed to produce an unsigned word integer result that is stored in the destination operand. Figure 4-14 shows the operation of the PSADBW instruction when using 64-bit operands.
2320 ///
2321 ///
2322 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSADBW.html).
2323 ///
2324 /// Supported operand variants:
2325 ///
2326 /// ```text
2327 /// +---+----------+
2328 /// | # | Operands |
2329 /// +---+----------+
2330 /// | 1 | Mm, Mem |
2331 /// | 2 | Mm, Mm |
2332 /// +---+----------+
2333 /// ```
2334 #[inline]
2335 pub fn mmx_psadbw<A, B>(&mut self, op0: A, op1: B)
2336 where Assembler<'a>: MmxPsadbwEmitter<A, B> {
2337 <Self as MmxPsadbwEmitter<A, B>>::mmx_psadbw(self, op0, op1);
2338 }
2339 /// `MMX_PSHUFW` (PSHUFW).
2340 /// Copies words from the source operand (second operand) and inserts them in the destination operand (first operand) at word locations selected with the order operand (third operand). This operation is similar to the operation used by the PSHUFD instruction, which is illustrated in Figure 4-16. For the PSHUFW instruction, each 2-bit field in the order operand selects the contents of one word location in the destination operand. The encodings of the order operand fields select words from the source operand to be copied to the destination operand.
2341 ///
2342 ///
2343 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PSHUFW.html).
2344 ///
2345 /// Supported operand variants:
2346 ///
2347 /// ```text
2348 /// +---+--------------+
2349 /// | # | Operands |
2350 /// +---+--------------+
2351 /// | 1 | Mm, Mem, Imm |
2352 /// | 2 | Mm, Mm, Imm |
2353 /// +---+--------------+
2354 /// ```
2355 #[inline]
2356 pub fn mmx_pshufw<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2357 where Assembler<'a>: MmxPshufwEmitter<A, B, C> {
2358 <Self as MmxPshufwEmitter<A, B, C>>::mmx_pshufw(self, op0, op1, op2);
2359 }
2360 /// `PREFETCHNTA` (PREFETCHNTA).
2361 /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2362 ///
2363 ///
2364 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2365 ///
2366 /// Supported operand variants:
2367 ///
2368 /// ```text
2369 /// +---+----------+
2370 /// | # | Operands |
2371 /// +---+----------+
2372 /// | 1 | Mem |
2373 /// +---+----------+
2374 /// ```
2375 #[inline]
2376 pub fn prefetchnta<A>(&mut self, op0: A)
2377 where Assembler<'a>: PrefetchntaEmitter<A> {
2378 <Self as PrefetchntaEmitter<A>>::prefetchnta(self, op0);
2379 }
2380 /// `PREFETCHT0` (PREFETCHT0).
2381 /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2382 ///
2383 ///
2384 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2385 ///
2386 /// Supported operand variants:
2387 ///
2388 /// ```text
2389 /// +---+----------+
2390 /// | # | Operands |
2391 /// +---+----------+
2392 /// | 1 | Mem |
2393 /// +---+----------+
2394 /// ```
2395 #[inline]
2396 pub fn prefetcht0<A>(&mut self, op0: A)
2397 where Assembler<'a>: Prefetcht0Emitter<A> {
2398 <Self as Prefetcht0Emitter<A>>::prefetcht0(self, op0);
2399 }
2400 /// `PREFETCHT1` (PREFETCHT1).
2401 /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2402 ///
2403 ///
2404 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2405 ///
2406 /// Supported operand variants:
2407 ///
2408 /// ```text
2409 /// +---+----------+
2410 /// | # | Operands |
2411 /// +---+----------+
2412 /// | 1 | Mem |
2413 /// +---+----------+
2414 /// ```
2415 #[inline]
2416 pub fn prefetcht1<A>(&mut self, op0: A)
2417 where Assembler<'a>: Prefetcht1Emitter<A> {
2418 <Self as Prefetcht1Emitter<A>>::prefetcht1(self, op0);
2419 }
2420 /// `PREFETCHT2` (PREFETCHT2).
2421 /// Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by a locality hint
2422 ///
2423 ///
2424 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/PREFETCHh.html).
2425 ///
2426 /// Supported operand variants:
2427 ///
2428 /// ```text
2429 /// +---+----------+
2430 /// | # | Operands |
2431 /// +---+----------+
2432 /// | 1 | Mem |
2433 /// +---+----------+
2434 /// ```
2435 #[inline]
2436 pub fn prefetcht2<A>(&mut self, op0: A)
2437 where Assembler<'a>: Prefetcht2Emitter<A> {
2438 <Self as Prefetcht2Emitter<A>>::prefetcht2(self, op0);
2439 }
2440 /// `SFENCE` (SFENCE).
2441 /// Orders processor execution relative to all memory stores prior to the SFENCE instruction. The processor ensures that every store prior to SFENCE is globally visible before any store after SFENCE becomes globally visible. The SFENCE instruction is ordered with respect to memory stores, other SFENCE instructions, MFENCE instructions, and any serializing instructions (such as the CPUID instruction). It is not ordered with respect to memory loads or the LFENCE instruction.
2442 ///
2443 ///
2444 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SFENCE.html).
2445 ///
2446 /// Supported operand variants:
2447 ///
2448 /// ```text
2449 /// +---+----------+
2450 /// | # | Operands |
2451 /// +---+----------+
2452 /// | 1 | (none) |
2453 /// +---+----------+
2454 /// ```
2455 #[inline]
2456 pub fn sfence(&mut self)
2457 where Assembler<'a>: SfenceEmitter {
2458 <Self as SfenceEmitter>::sfence(self);
2459 }
2460 /// `SSE_ADDPS` (ADDPS).
2461 /// Adds four, eight or sixteen packed single precision floating-point values from the first source operand with the second source operand, and stores the packed single precision floating-point result in the destination operand.
2462 ///
2463 ///
2464 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDPS.html).
2465 ///
2466 /// Supported operand variants:
2467 ///
2468 /// ```text
2469 /// +---+----------+
2470 /// | # | Operands |
2471 /// +---+----------+
2472 /// | 1 | Xmm, Mem |
2473 /// | 2 | Xmm, Xmm |
2474 /// +---+----------+
2475 /// ```
2476 #[inline]
2477 pub fn sse_addps<A, B>(&mut self, op0: A, op1: B)
2478 where Assembler<'a>: SseAddpsEmitter<A, B> {
2479 <Self as SseAddpsEmitter<A, B>>::sse_addps(self, op0, op1);
2480 }
2481 /// `SSE_ADDSS` (ADDSS).
2482 /// Adds the low single precision floating-point values from the second source operand and the first source operand, and stores the double precision floating-point result in the destination operand.
2483 ///
2484 ///
2485 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ADDSS.html).
2486 ///
2487 /// Supported operand variants:
2488 ///
2489 /// ```text
2490 /// +---+----------+
2491 /// | # | Operands |
2492 /// +---+----------+
2493 /// | 1 | Xmm, Mem |
2494 /// | 2 | Xmm, Xmm |
2495 /// +---+----------+
2496 /// ```
2497 #[inline]
2498 pub fn sse_addss<A, B>(&mut self, op0: A, op1: B)
2499 where Assembler<'a>: SseAddssEmitter<A, B> {
2500 <Self as SseAddssEmitter<A, B>>::sse_addss(self, op0, op1);
2501 }
2502 /// `SSE_ANDNPS` (ANDNPS).
2503 /// Performs a bitwise logical AND NOT of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
2504 ///
2505 ///
2506 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDNPS.html).
2507 ///
2508 /// Supported operand variants:
2509 ///
2510 /// ```text
2511 /// +---+----------+
2512 /// | # | Operands |
2513 /// +---+----------+
2514 /// | 1 | Xmm, Mem |
2515 /// | 2 | Xmm, Xmm |
2516 /// +---+----------+
2517 /// ```
2518 #[inline]
2519 pub fn sse_andnps<A, B>(&mut self, op0: A, op1: B)
2520 where Assembler<'a>: SseAndnpsEmitter<A, B> {
2521 <Self as SseAndnpsEmitter<A, B>>::sse_andnps(self, op0, op1);
2522 }
2523 /// `SSE_ANDPS` (ANDPS).
2524 /// Performs a bitwise logical AND of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.
2525 ///
2526 ///
2527 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ANDPS.html).
2528 ///
2529 /// Supported operand variants:
2530 ///
2531 /// ```text
2532 /// +---+----------+
2533 /// | # | Operands |
2534 /// +---+----------+
2535 /// | 1 | Xmm, Mem |
2536 /// | 2 | Xmm, Xmm |
2537 /// +---+----------+
2538 /// ```
2539 #[inline]
2540 pub fn sse_andps<A, B>(&mut self, op0: A, op1: B)
2541 where Assembler<'a>: SseAndpsEmitter<A, B> {
2542 <Self as SseAndpsEmitter<A, B>>::sse_andps(self, op0, op1);
2543 }
2544 /// `SSE_CMPPS` (CMPPS).
2545 /// Performs a SIMD compare of the packed single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.
2546 ///
2547 ///
2548 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPPS.html).
2549 ///
2550 /// Supported operand variants:
2551 ///
2552 /// ```text
2553 /// +---+---------------+
2554 /// | # | Operands |
2555 /// +---+---------------+
2556 /// | 1 | Xmm, Mem, Imm |
2557 /// | 2 | Xmm, Xmm, Imm |
2558 /// +---+---------------+
2559 /// ```
2560 #[inline]
2561 pub fn sse_cmpps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2562 where Assembler<'a>: SseCmppsEmitter<A, B, C> {
2563 <Self as SseCmppsEmitter<A, B, C>>::sse_cmpps(self, op0, op1, op2);
2564 }
2565 /// `SSE_CMPSS` (CMPSS).
2566 /// Compares the low single precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.
2567 ///
2568 ///
2569 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CMPSS.html).
2570 ///
2571 /// Supported operand variants:
2572 ///
2573 /// ```text
2574 /// +---+---------------+
2575 /// | # | Operands |
2576 /// +---+---------------+
2577 /// | 1 | Xmm, Mem, Imm |
2578 /// | 2 | Xmm, Xmm, Imm |
2579 /// +---+---------------+
2580 /// ```
2581 #[inline]
2582 pub fn sse_cmpss<A, B, C>(&mut self, op0: A, op1: B, op2: C)
2583 where Assembler<'a>: SseCmpssEmitter<A, B, C> {
2584 <Self as SseCmpssEmitter<A, B, C>>::sse_cmpss(self, op0, op1, op2);
2585 }
2586 /// `SSE_COMISS` (COMISS).
2587 /// Compares the single precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
2588 ///
2589 ///
2590 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/COMISS.html).
2591 ///
2592 /// Supported operand variants:
2593 ///
2594 /// ```text
2595 /// +---+----------+
2596 /// | # | Operands |
2597 /// +---+----------+
2598 /// | 1 | Xmm, Mem |
2599 /// | 2 | Xmm, Xmm |
2600 /// +---+----------+
2601 /// ```
2602 #[inline]
2603 pub fn sse_comiss<A, B>(&mut self, op0: A, op1: B)
2604 where Assembler<'a>: SseComissEmitter<A, B> {
2605 <Self as SseComissEmitter<A, B>>::sse_comiss(self, op0, op1);
2606 }
2607 /// `SSE_CVTSI2SS` (CVTSI2SS).
2608 /// Converts a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the “convert-from” source operand to a single precision floating-point value in the destination operand (first operand). The “convert-from” source operand can be a general-purpose register or a memory location. The destination operand is an XMM register. The result is stored in the low doubleword of the destination operand, and the upper three doublewords are left unchanged. When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits.
2609 ///
2610 ///
2611 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSI2SS.html).
2612 ///
2613 /// Supported operand variants:
2614 ///
2615 /// ```text
2616 /// +---+----------+
2617 /// | # | Operands |
2618 /// +---+----------+
2619 /// | 1 | Xmm, Gpd |
2620 /// | 2 | Xmm, Gpq |
2621 /// | 3 | Xmm, Mem |
2622 /// +---+----------+
2623 /// ```
2624 #[inline]
2625 pub fn sse_cvtsi2ss<A, B>(&mut self, op0: A, op1: B)
2626 where Assembler<'a>: SseCvtsi2ssEmitter<A, B> {
2627 <Self as SseCvtsi2ssEmitter<A, B>>::sse_cvtsi2ss(self, op0, op1);
2628 }
2629 /// `SSE_CVTSS2SI` (CVTSS2SI).
2630 /// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
2631 ///
2632 ///
2633 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTSS2SI.html).
2634 ///
2635 /// Supported operand variants:
2636 ///
2637 /// ```text
2638 /// +---+----------+
2639 /// | # | Operands |
2640 /// +---+----------+
2641 /// | 1 | Gpd, Mem |
2642 /// | 2 | Gpd, Xmm |
2643 /// | 3 | Gpq, Mem |
2644 /// | 4 | Gpq, Xmm |
2645 /// +---+----------+
2646 /// ```
2647 #[inline]
2648 pub fn sse_cvtss2si<A, B>(&mut self, op0: A, op1: B)
2649 where Assembler<'a>: SseCvtss2siEmitter<A, B> {
2650 <Self as SseCvtss2siEmitter<A, B>>::sse_cvtss2si(self, op0, op1);
2651 }
2652 /// `SSE_CVTTSS2SI` (CVTTSS2SI).
2653 /// Converts a single precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single precision floating-point value is contained in the low doubleword of the register.
2654 ///
2655 ///
2656 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/CVTTSS2SI.html).
2657 ///
2658 /// Supported operand variants:
2659 ///
2660 /// ```text
2661 /// +---+----------+
2662 /// | # | Operands |
2663 /// +---+----------+
2664 /// | 1 | Gpd, Mem |
2665 /// | 2 | Gpd, Xmm |
2666 /// | 3 | Gpq, Mem |
2667 /// | 4 | Gpq, Xmm |
2668 /// +---+----------+
2669 /// ```
2670 #[inline]
2671 pub fn sse_cvttss2si<A, B>(&mut self, op0: A, op1: B)
2672 where Assembler<'a>: SseCvttss2siEmitter<A, B> {
2673 <Self as SseCvttss2siEmitter<A, B>>::sse_cvttss2si(self, op0, op1);
2674 }
2675 /// `SSE_DIVPS` (DIVPS).
2676 /// Performs a SIMD divide of the four, eight or sixteen packed single precision floating-point values in the first source operand (the second operand) by the four, eight or sixteen packed single precision floating-point values in the second source operand (the third operand). Results are written to the destination operand (the first operand).
2677 ///
2678 ///
2679 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVPS.html).
2680 ///
2681 /// Supported operand variants:
2682 ///
2683 /// ```text
2684 /// +---+----------+
2685 /// | # | Operands |
2686 /// +---+----------+
2687 /// | 1 | Xmm, Mem |
2688 /// | 2 | Xmm, Xmm |
2689 /// +---+----------+
2690 /// ```
2691 #[inline]
2692 pub fn sse_divps<A, B>(&mut self, op0: A, op1: B)
2693 where Assembler<'a>: SseDivpsEmitter<A, B> {
2694 <Self as SseDivpsEmitter<A, B>>::sse_divps(self, op0, op1);
2695 }
2696 /// `SSE_DIVSS` (DIVSS).
2697 /// Divides the low single precision floating-point value in the first source operand by the low single precision floating-point value in the second source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location.
2698 ///
2699 ///
2700 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/DIVSS.html).
2701 ///
2702 /// Supported operand variants:
2703 ///
2704 /// ```text
2705 /// +---+----------+
2706 /// | # | Operands |
2707 /// +---+----------+
2708 /// | 1 | Xmm, Mem |
2709 /// | 2 | Xmm, Xmm |
2710 /// +---+----------+
2711 /// ```
2712 #[inline]
2713 pub fn sse_divss<A, B>(&mut self, op0: A, op1: B)
2714 where Assembler<'a>: SseDivssEmitter<A, B> {
2715 <Self as SseDivssEmitter<A, B>>::sse_divss(self, op0, op1);
2716 }
2717 /// `SSE_MAXPS` (MAXPS).
2718 /// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand.
2719 ///
2720 ///
2721 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXPS.html).
2722 ///
2723 /// Supported operand variants:
2724 ///
2725 /// ```text
2726 /// +---+----------+
2727 /// | # | Operands |
2728 /// +---+----------+
2729 /// | 1 | Xmm, Mem |
2730 /// | 2 | Xmm, Xmm |
2731 /// +---+----------+
2732 /// ```
2733 #[inline]
2734 pub fn sse_maxps<A, B>(&mut self, op0: A, op1: B)
2735 where Assembler<'a>: SseMaxpsEmitter<A, B> {
2736 <Self as SseMaxpsEmitter<A, B>>::sse_maxps(self, op0, op1);
2737 }
2738 /// `SSE_MAXSS` (MAXSS).
2739 /// Compares the low single precision floating-point values in the first source operand and the second source operand, and returns the maximum value to the low doubleword of the destination operand.
2740 ///
2741 ///
2742 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MAXSS.html).
2743 ///
2744 /// Supported operand variants:
2745 ///
2746 /// ```text
2747 /// +---+----------+
2748 /// | # | Operands |
2749 /// +---+----------+
2750 /// | 1 | Xmm, Mem |
2751 /// | 2 | Xmm, Xmm |
2752 /// +---+----------+
2753 /// ```
2754 #[inline]
2755 pub fn sse_maxss<A, B>(&mut self, op0: A, op1: B)
2756 where Assembler<'a>: SseMaxssEmitter<A, B> {
2757 <Self as SseMaxssEmitter<A, B>>::sse_maxss(self, op0, op1);
2758 }
2759 /// `SSE_MINPS` (MINPS).
2760 /// Performs a SIMD compare of the packed single precision floating-point values in the first source operand and the second source operand and returns the minimum value for each pair of values to the destination operand.
2761 ///
2762 ///
2763 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINPS.html).
2764 ///
2765 /// Supported operand variants:
2766 ///
2767 /// ```text
2768 /// +---+----------+
2769 /// | # | Operands |
2770 /// +---+----------+
2771 /// | 1 | Xmm, Mem |
2772 /// | 2 | Xmm, Xmm |
2773 /// +---+----------+
2774 /// ```
2775 #[inline]
2776 pub fn sse_minps<A, B>(&mut self, op0: A, op1: B)
2777 where Assembler<'a>: SseMinpsEmitter<A, B> {
2778 <Self as SseMinpsEmitter<A, B>>::sse_minps(self, op0, op1);
2779 }
2780 /// `SSE_MINSS` (MINSS).
2781 /// Compares the low single precision floating-point values in the first source operand and the second source operand and returns the minimum value to the low doubleword of the destination operand.
2782 ///
2783 ///
2784 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MINSS.html).
2785 ///
2786 /// Supported operand variants:
2787 ///
2788 /// ```text
2789 /// +---+----------+
2790 /// | # | Operands |
2791 /// +---+----------+
2792 /// | 1 | Xmm, Mem |
2793 /// | 2 | Xmm, Xmm |
2794 /// +---+----------+
2795 /// ```
2796 #[inline]
2797 pub fn sse_minss<A, B>(&mut self, op0: A, op1: B)
2798 where Assembler<'a>: SseMinssEmitter<A, B> {
2799 <Self as SseMinssEmitter<A, B>>::sse_minss(self, op0, op1);
2800 }
2801 /// `SSE_MOVAPS` (MOVAPS).
2802 /// Moves 4, 8 or 16 single precision floating-point values from the source operand (second operand) to the destination operand (first operand). This instruction can be used to load an XMM, YMM or ZMM register from an 128-bit, 256-bit or 512-bit memory location, to store the contents of an XMM, YMM or ZMM register into a 128-bit, 256-bit or 512-bit memory location, or to move data between two XMM, two YMM or two ZMM registers.
2803 ///
2804 ///
2805 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVAPS.html).
2806 ///
2807 /// Supported operand variants:
2808 ///
2809 /// ```text
2810 /// +---+----------+
2811 /// | # | Operands |
2812 /// +---+----------+
2813 /// | 1 | Mem, Xmm |
2814 /// | 2 | Xmm, Mem |
2815 /// | 3 | Xmm, Xmm |
2816 /// +---+----------+
2817 /// ```
2818 #[inline]
2819 pub fn sse_movaps<A, B>(&mut self, op0: A, op1: B)
2820 where Assembler<'a>: SseMovapsEmitter<A, B> {
2821 <Self as SseMovapsEmitter<A, B>>::sse_movaps(self, op0, op1);
2822 }
2823 /// `SSE_MOVHLPS` (MOVHLPS).
2824 /// This instruction cannot be used for memory to register moves.
2825 ///
2826 ///
2827 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHLPS.html).
2828 ///
2829 /// Supported operand variants:
2830 ///
2831 /// ```text
2832 /// +---+----------+
2833 /// | # | Operands |
2834 /// +---+----------+
2835 /// | 1 | Xmm, Xmm |
2836 /// +---+----------+
2837 /// ```
2838 #[inline]
2839 pub fn sse_movhlps<A, B>(&mut self, op0: A, op1: B)
2840 where Assembler<'a>: SseMovhlpsEmitter<A, B> {
2841 <Self as SseMovhlpsEmitter<A, B>>::sse_movhlps(self, op0, op1);
2842 }
2843 /// `SSE_MOVHPS` (MOVHPS).
2844 /// This instruction cannot be used for register to register or memory to memory moves.
2845 ///
2846 ///
2847 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVHPS.html).
2848 ///
2849 /// Supported operand variants:
2850 ///
2851 /// ```text
2852 /// +---+----------+
2853 /// | # | Operands |
2854 /// +---+----------+
2855 /// | 1 | Mem, Xmm |
2856 /// | 2 | Xmm, Mem |
2857 /// +---+----------+
2858 /// ```
2859 #[inline]
2860 pub fn sse_movhps<A, B>(&mut self, op0: A, op1: B)
2861 where Assembler<'a>: SseMovhpsEmitter<A, B> {
2862 <Self as SseMovhpsEmitter<A, B>>::sse_movhps(self, op0, op1);
2863 }
2864 /// `SSE_MOVLHPS` (MOVLHPS).
2865 /// This instruction cannot be used for memory to register moves.
2866 ///
2867 ///
2868 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLHPS.html).
2869 ///
2870 /// Supported operand variants:
2871 ///
2872 /// ```text
2873 /// +---+----------+
2874 /// | # | Operands |
2875 /// +---+----------+
2876 /// | 1 | Xmm, Xmm |
2877 /// +---+----------+
2878 /// ```
2879 #[inline]
2880 pub fn sse_movlhps<A, B>(&mut self, op0: A, op1: B)
2881 where Assembler<'a>: SseMovlhpsEmitter<A, B> {
2882 <Self as SseMovlhpsEmitter<A, B>>::sse_movlhps(self, op0, op1);
2883 }
2884 /// `SSE_MOVLPS` (MOVLPS).
2885 /// This instruction cannot be used for register to register or memory to memory moves.
2886 ///
2887 ///
2888 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVLPS.html).
2889 ///
2890 /// Supported operand variants:
2891 ///
2892 /// ```text
2893 /// +---+----------+
2894 /// | # | Operands |
2895 /// +---+----------+
2896 /// | 1 | Mem, Xmm |
2897 /// | 2 | Xmm, Mem |
2898 /// +---+----------+
2899 /// ```
2900 #[inline]
2901 pub fn sse_movlps<A, B>(&mut self, op0: A, op1: B)
2902 where Assembler<'a>: SseMovlpsEmitter<A, B> {
2903 <Self as SseMovlpsEmitter<A, B>>::sse_movlps(self, op0, op1);
2904 }
2905 /// `SSE_MOVMSKPS` (MOVMSKPS).
2906 /// Extracts the sign bits from the packed single precision floating-point values in the source operand (second operand), formats them into a 4- or 8-bit mask, and stores the mask in the destination operand (first operand). The source operand is an XMM or YMM register, and the destination operand is a general-purpose register. The mask is stored in the 4 or 8 low-order bits of the destination operand. The upper bits of the destination operand beyond the mask are filled with zeros.
2907 ///
2908 ///
2909 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVMSKPS.html).
2910 ///
2911 /// Supported operand variants:
2912 ///
2913 /// ```text
2914 /// +---+----------+
2915 /// | # | Operands |
2916 /// +---+----------+
2917 /// | 1 | Gpq, Xmm |
2918 /// +---+----------+
2919 /// ```
2920 #[inline]
2921 pub fn sse_movmskps<A, B>(&mut self, op0: A, op1: B)
2922 where Assembler<'a>: SseMovmskpsEmitter<A, B> {
2923 <Self as SseMovmskpsEmitter<A, B>>::sse_movmskps(self, op0, op1);
2924 }
2925 /// `SSE_MOVNTPS` (MOVNTPS).
2926 /// Moves the packed single precision floating-point values in the source operand (second operand) to the destination operand (first operand) using a non-temporal hint to prevent caching of the data during the write to memory. The source operand is an XMM register, YMM register or ZMM register, which is assumed to contain packed single precision, floating-pointing. The destination operand is a 128-bit, 256-bit or 512-bit memory location. The memory operand must be aligned on a 16-byte (128-bit version), 32-byte (VEX.256 encoded version) or 64-byte (EVEX.512 encoded version) boundary otherwise a general-protection exception (#GP) will be generated.
2927 ///
2928 ///
2929 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVNTPS.html).
2930 ///
2931 /// Supported operand variants:
2932 ///
2933 /// ```text
2934 /// +---+----------+
2935 /// | # | Operands |
2936 /// +---+----------+
2937 /// | 1 | Mem, Xmm |
2938 /// +---+----------+
2939 /// ```
2940 #[inline]
2941 pub fn sse_movntps<A, B>(&mut self, op0: A, op1: B)
2942 where Assembler<'a>: SseMovntpsEmitter<A, B> {
2943 <Self as SseMovntpsEmitter<A, B>>::sse_movntps(self, op0, op1);
2944 }
2945 /// `SSE_MOVNTSS`.
2946 ///
2947 /// Supported operand variants:
2948 ///
2949 /// ```text
2950 /// +---+----------+
2951 /// | # | Operands |
2952 /// +---+----------+
2953 /// | 1 | Mem, Xmm |
2954 /// +---+----------+
2955 /// ```
2956 #[inline]
2957 pub fn sse_movntss<A, B>(&mut self, op0: A, op1: B)
2958 where Assembler<'a>: SseMovntssEmitter<A, B> {
2959 <Self as SseMovntssEmitter<A, B>>::sse_movntss(self, op0, op1);
2960 }
2961 /// `SSE_MOVSS` (MOVSS).
2962 /// Moves a scalar single precision floating-point value from the source operand (second operand) to the destination operand (first operand). The source and destination operands can be XMM registers or 32-bit memory locations. This instruction can be used to move a single precision floating-point value to and from the low doubleword of an XMM register and a 32-bit memory location, or to move a single precision floating-point value between the low doublewords of two XMM registers. The instruction cannot be used to transfer data between memory locations.
2963 ///
2964 ///
2965 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVSS.html).
2966 ///
2967 /// Supported operand variants:
2968 ///
2969 /// ```text
2970 /// +---+----------+
2971 /// | # | Operands |
2972 /// +---+----------+
2973 /// | 1 | Mem, Xmm |
2974 /// | 2 | Xmm, Mem |
2975 /// | 3 | Xmm, Xmm |
2976 /// +---+----------+
2977 /// ```
2978 #[inline]
2979 pub fn sse_movss<A, B>(&mut self, op0: A, op1: B)
2980 where Assembler<'a>: SseMovssEmitter<A, B> {
2981 <Self as SseMovssEmitter<A, B>>::sse_movss(self, op0, op1);
2982 }
2983 /// `SSE_MOVUPS` (MOVUPS).
2984 /// Note: VEX.vvvv and EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.
2985 ///
2986 ///
2987 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MOVUPS.html).
2988 ///
2989 /// Supported operand variants:
2990 ///
2991 /// ```text
2992 /// +---+----------+
2993 /// | # | Operands |
2994 /// +---+----------+
2995 /// | 1 | Mem, Xmm |
2996 /// | 2 | Xmm, Mem |
2997 /// | 3 | Xmm, Xmm |
2998 /// +---+----------+
2999 /// ```
3000 #[inline]
3001 pub fn sse_movups<A, B>(&mut self, op0: A, op1: B)
3002 where Assembler<'a>: SseMovupsEmitter<A, B> {
3003 <Self as SseMovupsEmitter<A, B>>::sse_movups(self, op0, op1);
3004 }
3005 /// `SSE_MULPS` (MULPS).
3006 /// Multiply the packed single precision floating-point values from the first source operand with the corresponding values in the second source operand, and stores the packed double precision floating-point results in the destination operand.
3007 ///
3008 ///
3009 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULPS.html).
3010 ///
3011 /// Supported operand variants:
3012 ///
3013 /// ```text
3014 /// +---+----------+
3015 /// | # | Operands |
3016 /// +---+----------+
3017 /// | 1 | Xmm, Mem |
3018 /// | 2 | Xmm, Xmm |
3019 /// +---+----------+
3020 /// ```
3021 #[inline]
3022 pub fn sse_mulps<A, B>(&mut self, op0: A, op1: B)
3023 where Assembler<'a>: SseMulpsEmitter<A, B> {
3024 <Self as SseMulpsEmitter<A, B>>::sse_mulps(self, op0, op1);
3025 }
3026 /// `SSE_MULSS` (MULSS).
3027 /// Multiplies the low single precision floating-point value from the second source operand by the low single precision floating-point value in the first source operand, and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source operand and the destination operands are XMM registers.
3028 ///
3029 ///
3030 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/MULSS.html).
3031 ///
3032 /// Supported operand variants:
3033 ///
3034 /// ```text
3035 /// +---+----------+
3036 /// | # | Operands |
3037 /// +---+----------+
3038 /// | 1 | Xmm, Mem |
3039 /// | 2 | Xmm, Xmm |
3040 /// +---+----------+
3041 /// ```
3042 #[inline]
3043 pub fn sse_mulss<A, B>(&mut self, op0: A, op1: B)
3044 where Assembler<'a>: SseMulssEmitter<A, B> {
3045 <Self as SseMulssEmitter<A, B>>::sse_mulss(self, op0, op1);
3046 }
3047 /// `SSE_ORPS` (ORPS).
3048 /// Performs a bitwise logical OR of the four, eight or sixteen packed single precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
3049 ///
3050 ///
3051 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/ORPS.html).
3052 ///
3053 /// Supported operand variants:
3054 ///
3055 /// ```text
3056 /// +---+----------+
3057 /// | # | Operands |
3058 /// +---+----------+
3059 /// | 1 | Xmm, Mem |
3060 /// | 2 | Xmm, Xmm |
3061 /// +---+----------+
3062 /// ```
3063 #[inline]
3064 pub fn sse_orps<A, B>(&mut self, op0: A, op1: B)
3065 where Assembler<'a>: SseOrpsEmitter<A, B> {
3066 <Self as SseOrpsEmitter<A, B>>::sse_orps(self, op0, op1);
3067 }
3068 /// `SSE_RCPPS` (RCPPS).
3069 /// Performs a SIMD computation of the approximate reciprocals of the four packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
3070 ///
3071 ///
3072 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPPS.html).
3073 ///
3074 /// Supported operand variants:
3075 ///
3076 /// ```text
3077 /// +---+----------+
3078 /// | # | Operands |
3079 /// +---+----------+
3080 /// | 1 | Xmm, Mem |
3081 /// | 2 | Xmm, Xmm |
3082 /// +---+----------+
3083 /// ```
3084 #[inline]
3085 pub fn sse_rcpps<A, B>(&mut self, op0: A, op1: B)
3086 where Assembler<'a>: SseRcppsEmitter<A, B> {
3087 <Self as SseRcppsEmitter<A, B>>::sse_rcpps(self, op0, op1);
3088 }
3089 /// `SSE_RCPSS` (RCPSS).
3090 /// Computes of an approximate reciprocal of the low single precision floating-point value in the source operand (second operand) and stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
3091 ///
3092 ///
3093 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RCPSS.html).
3094 ///
3095 /// Supported operand variants:
3096 ///
3097 /// ```text
3098 /// +---+----------+
3099 /// | # | Operands |
3100 /// +---+----------+
3101 /// | 1 | Xmm, Mem |
3102 /// | 2 | Xmm, Xmm |
3103 /// +---+----------+
3104 /// ```
3105 #[inline]
3106 pub fn sse_rcpss<A, B>(&mut self, op0: A, op1: B)
3107 where Assembler<'a>: SseRcpssEmitter<A, B> {
3108 <Self as SseRcpssEmitter<A, B>>::sse_rcpss(self, op0, op1);
3109 }
3110 /// `SSE_RSQRTPS` (RSQRTPS).
3111 /// Performs a SIMD computation of the approximate reciprocals of the square roots of the four packed single precision floating-point values in the source operand (second operand) and stores the packed single precision floating-point results in the destination operand. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. See Figure 10-5 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a SIMD single precision floating-point operation.
3112 ///
3113 ///
3114 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTPS.html).
3115 ///
3116 /// Supported operand variants:
3117 ///
3118 /// ```text
3119 /// +---+----------+
3120 /// | # | Operands |
3121 /// +---+----------+
3122 /// | 1 | Xmm, Mem |
3123 /// | 2 | Xmm, Xmm |
3124 /// +---+----------+
3125 /// ```
3126 #[inline]
3127 pub fn sse_rsqrtps<A, B>(&mut self, op0: A, op1: B)
3128 where Assembler<'a>: SseRsqrtpsEmitter<A, B> {
3129 <Self as SseRsqrtpsEmitter<A, B>>::sse_rsqrtps(self, op0, op1);
3130 }
3131 /// `SSE_RSQRTSS` (RSQRTSS).
3132 /// Computes an approximate reciprocal of the square root of the low single precision floating-point value in the source operand (second operand) stores the single precision floating-point result in the destination operand. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register. The three high-order doublewords of the destination operand remain unchanged. See Figure 10-6 in the Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1, for an illustration of a scalar single precision floating-point operation.
3133 ///
3134 ///
3135 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/RSQRTSS.html).
3136 ///
3137 /// Supported operand variants:
3138 ///
3139 /// ```text
3140 /// +---+----------+
3141 /// | # | Operands |
3142 /// +---+----------+
3143 /// | 1 | Xmm, Mem |
3144 /// | 2 | Xmm, Xmm |
3145 /// +---+----------+
3146 /// ```
3147 #[inline]
3148 pub fn sse_rsqrtss<A, B>(&mut self, op0: A, op1: B)
3149 where Assembler<'a>: SseRsqrtssEmitter<A, B> {
3150 <Self as SseRsqrtssEmitter<A, B>>::sse_rsqrtss(self, op0, op1);
3151 }
3152 /// `SSE_SHUFPS` (SHUFPS).
3153 /// Selects a single precision floating-point value of an input quadruplet using a two-bit control and move to a designated element of the destination operand. Each 64-bit element-pair of a 128-bit lane of the destination operand is interleaved between the corresponding lane of the first source operand and the second source operand at the granularity 128 bits. Each two bits in the imm8 byte, starting from bit 0, is the select control of the corresponding element of a 128-bit lane of the destination to received the shuffled result of an input quadruplet. The two lower elements of a 128-bit lane in the destination receives shuffle results from the quadruple of the first source operand. The next two elements of the destination receives shuffle results from the quadruple of the second source operand.
3154 ///
3155 ///
3156 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SHUFPS.html).
3157 ///
3158 /// Supported operand variants:
3159 ///
3160 /// ```text
3161 /// +---+---------------+
3162 /// | # | Operands |
3163 /// +---+---------------+
3164 /// | 1 | Xmm, Mem, Imm |
3165 /// | 2 | Xmm, Xmm, Imm |
3166 /// +---+---------------+
3167 /// ```
3168 #[inline]
3169 pub fn sse_shufps<A, B, C>(&mut self, op0: A, op1: B, op2: C)
3170 where Assembler<'a>: SseShufpsEmitter<A, B, C> {
3171 <Self as SseShufpsEmitter<A, B, C>>::sse_shufps(self, op0, op1, op2);
3172 }
3173 /// `SSE_SQRTPS` (SQRTPS).
3174 /// Performs a SIMD computation of the square roots of the four, eight or sixteen packed single precision floating-point values in the source operand (second operand) stores the packed single precision floating-point results in the destination operand.
3175 ///
3176 ///
3177 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTPS.html).
3178 ///
3179 /// Supported operand variants:
3180 ///
3181 /// ```text
3182 /// +---+----------+
3183 /// | # | Operands |
3184 /// +---+----------+
3185 /// | 1 | Xmm, Mem |
3186 /// | 2 | Xmm, Xmm |
3187 /// +---+----------+
3188 /// ```
3189 #[inline]
3190 pub fn sse_sqrtps<A, B>(&mut self, op0: A, op1: B)
3191 where Assembler<'a>: SseSqrtpsEmitter<A, B> {
3192 <Self as SseSqrtpsEmitter<A, B>>::sse_sqrtps(self, op0, op1);
3193 }
3194 /// `SSE_SQRTSS` (SQRTSS).
3195 /// Computes the square root of the low single precision floating-point value in the second source operand and stores the single precision floating-point result in the destination operand. The second source operand can be an XMM register or a 32-bit memory location. The first source and destination operands is an XMM register.
3196 ///
3197 ///
3198 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SQRTSS.html).
3199 ///
3200 /// Supported operand variants:
3201 ///
3202 /// ```text
3203 /// +---+----------+
3204 /// | # | Operands |
3205 /// +---+----------+
3206 /// | 1 | Xmm, Mem |
3207 /// | 2 | Xmm, Xmm |
3208 /// +---+----------+
3209 /// ```
3210 #[inline]
3211 pub fn sse_sqrtss<A, B>(&mut self, op0: A, op1: B)
3212 where Assembler<'a>: SseSqrtssEmitter<A, B> {
3213 <Self as SseSqrtssEmitter<A, B>>::sse_sqrtss(self, op0, op1);
3214 }
3215 /// `SSE_SUBPS` (SUBPS).
3216 /// Performs a SIMD subtract of the packed single precision floating-point values in the second Source operand from the First Source operand, and stores the packed single precision floating-point results in the destination operand.
3217 ///
3218 ///
3219 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBPS.html).
3220 ///
3221 /// Supported operand variants:
3222 ///
3223 /// ```text
3224 /// +---+----------+
3225 /// | # | Operands |
3226 /// +---+----------+
3227 /// | 1 | Xmm, Mem |
3228 /// | 2 | Xmm, Xmm |
3229 /// +---+----------+
3230 /// ```
3231 #[inline]
3232 pub fn sse_subps<A, B>(&mut self, op0: A, op1: B)
3233 where Assembler<'a>: SseSubpsEmitter<A, B> {
3234 <Self as SseSubpsEmitter<A, B>>::sse_subps(self, op0, op1);
3235 }
3236 /// `SSE_SUBSS` (SUBSS).
3237 /// Subtract the low single precision floating-point value from the second source operand and the first source operand and store the double precision floating-point result in the low doubleword of the destination operand.
3238 ///
3239 ///
3240 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/SUBSS.html).
3241 ///
3242 /// Supported operand variants:
3243 ///
3244 /// ```text
3245 /// +---+----------+
3246 /// | # | Operands |
3247 /// +---+----------+
3248 /// | 1 | Xmm, Mem |
3249 /// | 2 | Xmm, Xmm |
3250 /// +---+----------+
3251 /// ```
3252 #[inline]
3253 pub fn sse_subss<A, B>(&mut self, op0: A, op1: B)
3254 where Assembler<'a>: SseSubssEmitter<A, B> {
3255 <Self as SseSubssEmitter<A, B>>::sse_subss(self, op0, op1);
3256 }
3257 /// `SSE_UCOMISS` (UCOMISS).
3258 /// Compares the single precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF, and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).
3259 ///
3260 ///
3261 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UCOMISS.html).
3262 ///
3263 /// Supported operand variants:
3264 ///
3265 /// ```text
3266 /// +---+----------+
3267 /// | # | Operands |
3268 /// +---+----------+
3269 /// | 1 | Xmm, Mem |
3270 /// | 2 | Xmm, Xmm |
3271 /// +---+----------+
3272 /// ```
3273 #[inline]
3274 pub fn sse_ucomiss<A, B>(&mut self, op0: A, op1: B)
3275 where Assembler<'a>: SseUcomissEmitter<A, B> {
3276 <Self as SseUcomissEmitter<A, B>>::sse_ucomiss(self, op0, op1);
3277 }
3278 /// `SSE_UNPCKHPS` (UNPCKHPS).
3279 /// Performs an interleaved unpack of the high single precision floating-point values from the first source operand and the second source operand.
3280 ///
3281 ///
3282 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKHPS.html).
3283 ///
3284 /// Supported operand variants:
3285 ///
3286 /// ```text
3287 /// +---+----------+
3288 /// | # | Operands |
3289 /// +---+----------+
3290 /// | 1 | Xmm, Mem |
3291 /// | 2 | Xmm, Xmm |
3292 /// +---+----------+
3293 /// ```
3294 #[inline]
3295 pub fn sse_unpckhps<A, B>(&mut self, op0: A, op1: B)
3296 where Assembler<'a>: SseUnpckhpsEmitter<A, B> {
3297 <Self as SseUnpckhpsEmitter<A, B>>::sse_unpckhps(self, op0, op1);
3298 }
3299 /// `SSE_UNPCKLPS` (UNPCKLPS).
3300 /// Performs an interleaved unpack of the low single precision floating-point values from the first source operand and the second source operand.
3301 ///
3302 ///
3303 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/UNPCKLPS.html).
3304 ///
3305 /// Supported operand variants:
3306 ///
3307 /// ```text
3308 /// +---+----------+
3309 /// | # | Operands |
3310 /// +---+----------+
3311 /// | 1 | Xmm, Mem |
3312 /// | 2 | Xmm, Xmm |
3313 /// +---+----------+
3314 /// ```
3315 #[inline]
3316 pub fn sse_unpcklps<A, B>(&mut self, op0: A, op1: B)
3317 where Assembler<'a>: SseUnpcklpsEmitter<A, B> {
3318 <Self as SseUnpcklpsEmitter<A, B>>::sse_unpcklps(self, op0, op1);
3319 }
3320 /// `SSE_XORPS` (XORPS).
3321 /// Performs a bitwise logical XOR of the four, eight or sixteen packed single-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand
3322 ///
3323 ///
3324 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/XORPS.html).
3325 ///
3326 /// Supported operand variants:
3327 ///
3328 /// ```text
3329 /// +---+----------+
3330 /// | # | Operands |
3331 /// +---+----------+
3332 /// | 1 | Xmm, Mem |
3333 /// | 2 | Xmm, Xmm |
3334 /// +---+----------+
3335 /// ```
3336 #[inline]
3337 pub fn sse_xorps<A, B>(&mut self, op0: A, op1: B)
3338 where Assembler<'a>: SseXorpsEmitter<A, B> {
3339 <Self as SseXorpsEmitter<A, B>>::sse_xorps(self, op0, op1);
3340 }
3341 /// `STMXCSR` (STMXCSR).
3342 /// Stores the contents of the MXCSR control and status register to the destination operand. The destination operand is a 32-bit memory location. The reserved bits in the MXCSR register are stored as 0s.
3343 ///
3344 ///
3345 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/STMXCSR.html).
3346 ///
3347 /// Supported operand variants:
3348 ///
3349 /// ```text
3350 /// +---+----------+
3351 /// | # | Operands |
3352 /// +---+----------+
3353 /// | 1 | Mem |
3354 /// +---+----------+
3355 /// ```
3356 #[inline]
3357 pub fn stmxcsr<A>(&mut self, op0: A)
3358 where Assembler<'a>: StmxcsrEmitter<A> {
3359 <Self as StmxcsrEmitter<A>>::stmxcsr(self, op0);
3360 }
3361}