Skip to main content

cranelift_codegen/isa/x64/lower/
isle.rs

1//! ISLE integration glue code for x64 lowering.
2
3// Pull in the ISLE generated code.
4pub(crate) mod generated_code;
5use crate::{ir::AtomicRmwOp, ir::types};
6use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8// Types that the generated ISLE code uses via `use super::*`.
9use super::external::{CraneliftRegisters, PairedGpr, PairedXmm, isle_assembler_methods};
10use super::{MergeableLoadSize, is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
11use crate::ir::condcodes::{FloatCC, IntCC};
12use crate::ir::immediates::*;
13use crate::ir::types::*;
14use crate::ir::{
15    BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16};
17use crate::isa::x64::X64Backend;
18use crate::isa::x64::inst::{ReturnCallInfo, args::*, regs};
19use crate::isa::x64::lower::{InsnInput, emit_vm_call};
20use crate::machinst::isle::*;
21use crate::machinst::{
22    ArgPair, CallArgList, CallInfo, CallRetList, InstOutput, MachInst, VCodeConstant,
23    VCodeConstantData,
24};
25use alloc::boxed::Box;
26use alloc::vec::Vec;
27use cranelift_assembler_x64 as asm;
28use regalloc2::PReg;
29
30/// Type representing out-of-line data for calls. This type optional because the
31/// call instruction is also used by Winch to emit calls, but the
32/// `Box<CallInfo>` field is not used, it's only used by Cranelift. By making it
33/// optional, we reduce the number of heap allocations in Winch.
34type BoxCallInfo = Box<CallInfo<ExternalName>>;
35type BoxCallIndInfo = Box<CallInfo<RegMem>>;
36type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
37type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
38type VecArgPair = Vec<ArgPair>;
39type BoxSyntheticAmode = Box<SyntheticAmode>;
40
41/// When interacting with the external assembler (see `external.rs`), we
42/// need to fix the types we'll use.
43type AssemblerInst = asm::Inst<CraneliftRegisters>;
44
45pub struct SinkableLoad {
46    inst: Inst,
47    addr_input: InsnInput,
48    offset: i32,
49}
50
51/// The main entry point for lowering with ISLE.
52pub(crate) fn lower(
53    lower_ctx: &mut Lower<MInst>,
54    backend: &X64Backend,
55    inst: Inst,
56) -> Option<InstOutput> {
57    // TODO: reuse the ISLE context across lowerings so we can reuse its
58    // internal heap allocations.
59    let mut isle_ctx = IsleContext { lower_ctx, backend };
60    generated_code::constructor_lower(&mut isle_ctx, inst)
61}
62
63pub(crate) fn lower_branch(
64    lower_ctx: &mut Lower<MInst>,
65    backend: &X64Backend,
66    branch: Inst,
67    targets: &[MachLabel],
68) -> Option<()> {
69    // TODO: reuse the ISLE context across lowerings so we can reuse its
70    // internal heap allocations.
71    let mut isle_ctx = IsleContext { lower_ctx, backend };
72    generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
73}
74
75impl Context for IsleContext<'_, '_, MInst, X64Backend> {
76    isle_lower_prelude_methods!();
77    isle_assembler_methods!();
78
79    fn gen_call_info(
80        &mut self,
81        sig: Sig,
82        dest: ExternalName,
83        uses: CallArgList,
84        defs: CallRetList,
85        try_call_info: Option<TryCallInfo>,
86        patchable: bool,
87    ) -> BoxCallInfo {
88        let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
89        let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
90        self.lower_ctx
91            .abi_mut()
92            .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
93
94        Box::new(
95            self.lower_ctx
96                .gen_call_info(sig, dest, uses, defs, try_call_info, patchable),
97        )
98    }
99
100    fn gen_call_ind_info(
101        &mut self,
102        sig: Sig,
103        dest: &RegMem,
104        uses: CallArgList,
105        defs: CallRetList,
106        try_call_info: Option<TryCallInfo>,
107    ) -> BoxCallIndInfo {
108        let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
109        let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
110        self.lower_ctx
111            .abi_mut()
112            .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
113
114        Box::new(
115            self.lower_ctx
116                .gen_call_info(sig, dest.clone(), uses, defs, try_call_info, false),
117        )
118    }
119
120    fn gen_return_call_info(
121        &mut self,
122        sig: Sig,
123        dest: ExternalName,
124        uses: CallArgList,
125    ) -> BoxReturnCallInfo {
126        let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
127        self.lower_ctx
128            .abi_mut()
129            .accumulate_tail_args_size(new_stack_arg_size);
130
131        Box::new(ReturnCallInfo {
132            dest,
133            uses,
134            tmp: self.lower_ctx.temp_writable_gpr(),
135            new_stack_arg_size,
136        })
137    }
138
139    fn gen_return_call_ind_info(
140        &mut self,
141        sig: Sig,
142        dest: Reg,
143        uses: CallArgList,
144    ) -> BoxReturnCallIndInfo {
145        let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
146        self.lower_ctx
147            .abi_mut()
148            .accumulate_tail_args_size(new_stack_arg_size);
149
150        Box::new(ReturnCallInfo {
151            dest,
152            uses,
153            tmp: self.lower_ctx.temp_writable_gpr(),
154            new_stack_arg_size,
155        })
156    }
157
158    #[inline]
159    fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
160        if ty.bits() == 64 {
161            OperandSize::Size64
162        } else {
163            OperandSize::Size32
164        }
165    }
166
167    #[inline]
168    fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
169        OperandSize::from_ty(ty)
170    }
171
172    fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
173        if let Some(imm) = self.i64_from_iconst(val) {
174            if let Ok(imm) = i32::try_from(imm) {
175                return RegMemImm::Imm {
176                    simm32: imm.cast_unsigned(),
177                };
178            }
179        }
180
181        self.put_in_reg_mem(val).into()
182    }
183
184    fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
185        if let Some(imm) = self.i64_from_iconst(val) {
186            if let Ok(imm) = i32::try_from(imm) {
187                return XmmMemImm::unwrap_new(RegMemImm::Imm {
188                    simm32: imm.cast_unsigned(),
189                });
190            }
191        }
192
193        let res = match self.put_in_xmm_mem(val).to_reg_mem() {
194            RegMem::Reg { reg } => RegMemImm::Reg { reg },
195            RegMem::Mem { addr } => RegMemImm::Mem { addr },
196        };
197
198        XmmMemImm::unwrap_new(res)
199    }
200
201    fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
202        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
203
204        if let Some(c) = inputs.constant {
205            // A load from the constant pool is better than a rematerialization into a register,
206            // because it reduces register pressure.
207            //
208            // NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force
209            // constants to be 16 bytes when a constant will be used in place of an xmm register.
210            let vcode_constant = self.emit_u128_le_const(c as u128);
211            return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
212        }
213
214        XmmMem::unwrap_new(self.put_in_reg_mem(val))
215    }
216
217    fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
218        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
219
220        if let Some(c) = inputs.constant {
221            // A load from the constant pool is better than a
222            // rematerialization into a register, because it reduces
223            // register pressure.
224            let vcode_constant = self.emit_u64_le_const(c);
225            return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
226        }
227
228        if let Some(load) = self.sinkable_load(val) {
229            return RegMem::Mem {
230                addr: self.sink_load(&load),
231            };
232        }
233
234        RegMem::reg(self.put_in_reg(val))
235    }
236
237    #[inline]
238    fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
239        imm.encode()
240    }
241
242    #[inline]
243    fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
244        imm.encode()
245    }
246
247    #[inline]
248    fn has_avx(&mut self) -> bool {
249        self.backend.x64_flags.has_avx()
250    }
251
252    #[inline]
253    fn use_avx2(&mut self) -> bool {
254        self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_avx2()
255    }
256
257    #[inline]
258    fn has_avx512vl(&mut self) -> bool {
259        self.backend.x64_flags.has_avx512vl()
260    }
261
262    #[inline]
263    fn has_avx512dq(&mut self) -> bool {
264        self.backend.x64_flags.has_avx512dq()
265    }
266
267    #[inline]
268    fn has_avx512f(&mut self) -> bool {
269        self.backend.x64_flags.has_avx512f()
270    }
271
272    #[inline]
273    fn has_avx512bitalg(&mut self) -> bool {
274        self.backend.x64_flags.has_avx512bitalg()
275    }
276
277    #[inline]
278    fn has_avx512vbmi(&mut self) -> bool {
279        self.backend.x64_flags.has_avx512vbmi()
280    }
281
282    #[inline]
283    fn has_lzcnt(&mut self) -> bool {
284        self.backend.x64_flags.has_lzcnt()
285    }
286
287    #[inline]
288    fn has_bmi1(&mut self) -> bool {
289        self.backend.x64_flags.has_bmi1()
290    }
291
292    #[inline]
293    fn has_bmi2(&mut self) -> bool {
294        self.backend.x64_flags.has_bmi2()
295    }
296
297    #[inline]
298    fn use_popcnt(&mut self) -> bool {
299        self.backend.x64_flags.has_popcnt() && self.backend.x64_flags.has_sse42()
300    }
301
302    #[inline]
303    fn use_fma(&mut self) -> bool {
304        self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_fma()
305    }
306
307    #[inline]
308    fn has_sse3(&mut self) -> bool {
309        self.backend.x64_flags.has_sse3()
310    }
311
312    #[inline]
313    fn has_ssse3(&mut self) -> bool {
314        self.backend.x64_flags.has_ssse3()
315    }
316
317    #[inline]
318    fn has_sse41(&mut self) -> bool {
319        self.backend.x64_flags.has_sse41()
320    }
321
322    #[inline]
323    fn use_sse42(&mut self) -> bool {
324        self.backend.x64_flags.has_sse41() && self.backend.x64_flags.has_sse42()
325    }
326
327    #[inline]
328    fn has_cmpxchg16b(&mut self) -> bool {
329        self.backend.x64_flags.has_cmpxchg16b()
330    }
331
332    #[inline]
333    fn shift_mask(&mut self, ty: Type) -> u8 {
334        debug_assert!(ty.lane_bits().is_power_of_two());
335
336        (ty.lane_bits() - 1) as u8
337    }
338
339    fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
340        (val.bits() as u8) & self.shift_mask(ty)
341    }
342
343    #[inline]
344    fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
345        let imm = self.i64_from_iconst(val)?;
346        Some(GprMemImm::unwrap_new(RegMemImm::Imm {
347            simm32: i32::try_from(imm).ok()?.cast_unsigned(),
348        }))
349    }
350
351    fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
352        if let Some(inst) = self.is_sinkable_inst(val) {
353            if let Some((addr_input, offset)) =
354                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
355            {
356                return Some(SinkableLoad {
357                    inst,
358                    addr_input,
359                    offset,
360                });
361            }
362        }
363        None
364    }
365
366    fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
367        if let Some(inst) = self.is_sinkable_inst(val) {
368            if let Some((addr_input, offset)) =
369                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
370            {
371                return Some(SinkableLoad {
372                    inst,
373                    addr_input,
374                    offset,
375                });
376            }
377        }
378        None
379    }
380
381    fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
382        self.lower_ctx.sink_inst(load.inst);
383        let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
384        SyntheticAmode::Real(addr)
385    }
386
387    #[inline]
388    fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
389        ExtMode::new(from_bits, to_bits).unwrap()
390    }
391
392    fn emit(&mut self, inst: &MInst) -> Unit {
393        self.lower_ctx.emit(inst.clone());
394    }
395
396    #[inline]
397    fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
398        // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
399        // shifted into bits 5:6).
400        0b00_00_00_00 | lane << 4
401    }
402
403    #[inline]
404    fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
405        RegMem::mem(addr.clone())
406    }
407
408    #[inline]
409    fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
410        amode.clone().into()
411    }
412
413    #[inline]
414    fn synthetic_amode_slot(&mut self, offset: i32) -> SyntheticAmode {
415        SyntheticAmode::SlotOffset { simm32: offset }
416    }
417
418    #[inline]
419    fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
420        SyntheticAmode::ConstantOffset(c)
421    }
422
423    #[inline]
424    fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
425        r.to_writable_reg()
426    }
427
428    #[inline]
429    fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
430        r.to_writable_reg()
431    }
432
433    fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
434        // When the shift amount is known, we can statically (i.e. at compile
435        // time) determine the mask to use and only emit that.
436        debug_assert!(amt < 8);
437        let mask_offset = amt as usize * 16;
438        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
439            &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
440        ));
441        SyntheticAmode::ConstantOffset(mask_constant)
442    }
443
444    fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
445        let mask_table = self
446            .lower_ctx
447            .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
448        SyntheticAmode::ConstantOffset(mask_table)
449    }
450
451    fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
452        // When the shift amount is known, we can statically (i.e. at compile
453        // time) determine the mask to use and only emit that.
454        debug_assert!(amt < 8);
455        let mask_offset = amt as usize * 16;
456        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
457            &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
458        ));
459        SyntheticAmode::ConstantOffset(mask_constant)
460    }
461
462    fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
463        let mask_table = self
464            .lower_ctx
465            .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
466        SyntheticAmode::ConstantOffset(mask_table)
467    }
468
469    #[inline]
470    fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
471        Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
472    }
473
474    #[inline]
475    fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
476        r.to_reg()
477    }
478
479    #[inline]
480    fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
481        r.to_reg()
482    }
483
484    #[inline]
485    fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
486        r.into()
487    }
488
489    #[inline]
490    fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
491        r.into()
492    }
493
494    #[inline]
495    fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
496        r.into()
497    }
498
499    #[inline]
500    fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
501        XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
502    }
503
504    #[inline]
505    fn temp_writable_gpr(&mut self) -> WritableGpr {
506        self.lower_ctx.temp_writable_gpr()
507    }
508
509    #[inline]
510    fn temp_writable_xmm(&mut self) -> WritableXmm {
511        self.lower_ctx.temp_writable_xmm()
512    }
513
514    #[inline]
515    fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
516        RegMemImm::Reg { reg }
517    }
518
519    #[inline]
520    fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
521        XmmMem::unwrap_new(rm.clone())
522    }
523
524    #[inline]
525    fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
526        GprMemImm::unwrap_new(rmi.clone())
527    }
528
529    #[inline]
530    fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
531        XmmMemImm::unwrap_new(rmi.clone())
532    }
533
534    #[inline]
535    fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
536        r.into()
537    }
538
539    #[inline]
540    fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
541        xm.clone().into()
542    }
543
544    #[inline]
545    fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
546        gm.clone().into()
547    }
548
549    #[inline]
550    fn xmm_new(&mut self, r: Reg) -> Xmm {
551        Xmm::unwrap_new(r)
552    }
553
554    #[inline]
555    fn gpr_new(&mut self, r: Reg) -> Gpr {
556        Gpr::unwrap_new(r)
557    }
558
559    #[inline]
560    fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
561        GprMem::unwrap_new(rm.clone())
562    }
563
564    #[inline]
565    fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
566        GprMem::unwrap_new(RegMem::reg(r))
567    }
568
569    #[inline]
570    fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
571        GprMem::from(gpr)
572    }
573
574    #[inline]
575    fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
576        GprMemImm::from(gpr)
577    }
578
579    #[inline]
580    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
581        if is_int_or_ref_ty(ty) || ty == I128 {
582            Some(RegisterClass::Gpr {
583                single_register: ty != I128,
584            })
585        } else if ty.is_float() || (ty.is_vector() && ty.bits() <= 128) {
586            Some(RegisterClass::Xmm)
587        } else {
588            None
589        }
590    }
591
592    #[inline]
593    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
594        match ty {
595            types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
596            _ => None,
597        }
598    }
599
600    #[inline]
601    fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
602        CC::from_intcc(*intcc)
603    }
604
605    #[inline]
606    fn cc_invert(&mut self, cc: &CC) -> CC {
607        cc.invert()
608    }
609
610    #[inline]
611    fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
612        match cc {
613            CC::Z => Some(*cc),
614            CC::NZ => Some(*cc),
615            _ => None,
616        }
617    }
618
619    #[inline]
620    fn sum_extend_fits_in_32_bits(
621        &mut self,
622        extend_from_ty: Type,
623        constant_value: Imm64,
624        offset: Offset32,
625    ) -> Option<u32> {
626        let offset: i64 = offset.into();
627        let constant_value: u64 = constant_value.bits() as u64;
628        // If necessary, zero extend `constant_value` up to 64 bits.
629        let shift = 64 - extend_from_ty.bits();
630        let zero_extended_constant_value = (constant_value << shift) >> shift;
631        // Sum up the two operands.
632        let sum = offset.wrapping_add(zero_extended_constant_value as i64);
633        // Check that the sum will fit in 32-bits.
634        if sum == ((sum << 32) >> 32) {
635            Some(sum as u32)
636        } else {
637            None
638        }
639    }
640
641    #[inline]
642    fn amode_offset(&mut self, addr: &SyntheticAmode, offset: i32) -> SyntheticAmode {
643        addr.offset(offset)
644    }
645
646    #[inline]
647    fn zero_offset(&mut self) -> Offset32 {
648        Offset32::new(0)
649    }
650
651    #[inline]
652    fn preg_rbp(&mut self) -> PReg {
653        regs::rbp().to_real_reg().unwrap().into()
654    }
655
656    #[inline]
657    fn preg_rsp(&mut self) -> PReg {
658        regs::rsp().to_real_reg().unwrap().into()
659    }
660
661    #[inline]
662    fn preg_pinned(&mut self) -> PReg {
663        regs::pinned_reg().to_real_reg().unwrap().into()
664    }
665
666    fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
667        let outputs = emit_vm_call(
668            self.lower_ctx,
669            &self.backend.flags,
670            &self.backend.triple,
671            *libcall,
672            &[ValueRegs::one(a)],
673        )
674        .expect("Failed to emit LibCall");
675
676        debug_assert_eq!(outputs.len(), 1);
677
678        outputs[0].only_reg().unwrap()
679    }
680
681    fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
682        let outputs = emit_vm_call(
683            self.lower_ctx,
684            &self.backend.flags,
685            &self.backend.triple,
686            *libcall,
687            &[ValueRegs::one(a), ValueRegs::one(b)],
688        )
689        .expect("Failed to emit LibCall");
690
691        debug_assert_eq!(outputs.len(), 1);
692
693        outputs[0].only_reg().unwrap()
694    }
695
696    fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
697        let outputs = emit_vm_call(
698            self.lower_ctx,
699            &self.backend.flags,
700            &self.backend.triple,
701            *libcall,
702            &[ValueRegs::one(a), ValueRegs::one(b), ValueRegs::one(c)],
703        )
704        .expect("Failed to emit LibCall");
705
706        debug_assert_eq!(outputs.len(), 1);
707
708        outputs[0].only_reg().unwrap()
709    }
710
711    #[inline]
712    fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
713        let const_data = self.lower_ctx.get_constant_data(constant);
714        if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
715            return Some(());
716        }
717        None
718    }
719
720    #[inline]
721    fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
722        let mask = mask
723            .iter()
724            .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
725            .map(|b| if b > 15 { 0b10000000 } else { b })
726            .collect();
727        self.lower_ctx
728            .use_constant(VCodeConstantData::Generated(mask))
729    }
730
731    #[inline]
732    fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
733        let mask = mask
734            .iter()
735            .map(|&b| if b > 15 { 0b10000000 } else { b })
736            .collect();
737        self.lower_ctx
738            .use_constant(VCodeConstantData::Generated(mask))
739    }
740
741    #[inline]
742    fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
743        let mask = mask
744            .iter()
745            .map(|&b| b.wrapping_sub(16))
746            .map(|b| if b > 15 { 0b10000000 } else { b })
747            .collect();
748        self.lower_ctx
749            .use_constant(VCodeConstantData::Generated(mask))
750    }
751
752    #[inline]
753    fn perm_from_mask_with_zeros(
754        &mut self,
755        mask: &VecMask,
756    ) -> Option<(VCodeConstant, VCodeConstant)> {
757        if !mask.iter().any(|&b| b > 31) {
758            return None;
759        }
760
761        let zeros = mask
762            .iter()
763            .map(|&b| if b > 31 { 0x00 } else { 0xff })
764            .collect();
765
766        Some((
767            self.perm_from_mask(mask),
768            self.lower_ctx
769                .use_constant(VCodeConstantData::Generated(zeros)),
770        ))
771    }
772
773    #[inline]
774    fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
775        let mask = mask.iter().cloned().collect();
776        self.lower_ctx
777            .use_constant(VCodeConstantData::Generated(mask))
778    }
779
780    fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
781        match XmmMemAligned::new(arg.clone().into()) {
782            Some(aligned) => aligned,
783            None => match arg.clone().into() {
784                RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
785                _ => unreachable!(),
786            },
787        }
788    }
789
790    fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
791        match XmmMemAlignedImm::new(arg.clone().into()) {
792            Some(aligned) => aligned,
793            None => match arg.clone().into() {
794                RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
795                _ => unreachable!(),
796            },
797        }
798    }
799
800    fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
801        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
802        if a < 4 && b < 4 && c < 4 && d < 4 {
803            Some(a | (b << 2) | (c << 4) | (d << 6))
804        } else {
805            None
806        }
807    }
808
809    fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
810        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
811        // When selecting from the right-hand-side, subtract these all by 4
812        // which will bail out if anything is less than 4. Afterwards the check
813        // is the same as `pshufd_lhs_imm` above.
814        let a = a.checked_sub(4)?;
815        let b = b.checked_sub(4)?;
816        let c = c.checked_sub(4)?;
817        let d = d.checked_sub(4)?;
818        if a < 4 && b < 4 && c < 4 && d < 4 {
819            Some(a | (b << 2) | (c << 4) | (d << 6))
820        } else {
821            None
822        }
823    }
824
825    fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
826        // The `shufps` instruction selects the first two elements from the
827        // first vector and the second two elements from the second vector, so
828        // offset the third/fourth selectors by 4 and then make sure everything
829        // fits in 32-bits.
830        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
831        let c = c.checked_sub(4)?;
832        let d = d.checked_sub(4)?;
833        if a < 4 && b < 4 && c < 4 && d < 4 {
834            Some(a | (b << 2) | (c << 4) | (d << 6))
835        } else {
836            None
837        }
838    }
839
840    fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
841        // This is almost the same as `shufps_imm` except the elements that are
842        // subtracted are reversed. This handles the case that `shufps`
843        // instruction can be emitted if the order of the operands are swapped.
844        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
845        let a = a.checked_sub(4)?;
846        let b = b.checked_sub(4)?;
847        if a < 4 && b < 4 && c < 4 && d < 4 {
848            Some(a | (b << 2) | (c << 4) | (d << 6))
849        } else {
850            None
851        }
852    }
853
854    fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
855        // Similar to `shufps` except this operates over 16-bit values so four
856        // of them must be fixed and the other four must be in-range to encode
857        // in the immediate.
858        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
859        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
860            Some(a | (b << 2) | (c << 4) | (d << 6))
861        } else {
862            None
863        }
864    }
865
866    fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
867        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
868        let a = a.checked_sub(8)?;
869        let b = b.checked_sub(8)?;
870        let c = c.checked_sub(8)?;
871        let d = d.checked_sub(8)?;
872        let e = e.checked_sub(8)?;
873        let f = f.checked_sub(8)?;
874        let g = g.checked_sub(8)?;
875        let h = h.checked_sub(8)?;
876        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
877            Some(a | (b << 2) | (c << 4) | (d << 6))
878        } else {
879            None
880        }
881    }
882
883    fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
884        // Similar to `pshuflw` except that the first four operands must be
885        // fixed and the second four are offset by an extra 4 and tested to
886        // make sure they're all in the range [4, 8).
887        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
888        let e = e.checked_sub(4)?;
889        let f = f.checked_sub(4)?;
890        let g = g.checked_sub(4)?;
891        let h = h.checked_sub(4)?;
892        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
893            Some(e | (f << 2) | (g << 4) | (h << 6))
894        } else {
895            None
896        }
897    }
898
899    fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
900        // Note that everything here is offset by at least 8 and the upper
901        // bits are offset by 12 to test they're in the range of [12, 16).
902        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
903        let a = a.checked_sub(8)?;
904        let b = b.checked_sub(8)?;
905        let c = c.checked_sub(8)?;
906        let d = d.checked_sub(8)?;
907        let e = e.checked_sub(12)?;
908        let f = f.checked_sub(12)?;
909        let g = g.checked_sub(12)?;
910        let h = h.checked_sub(12)?;
911        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
912            Some(e | (f << 2) | (g << 4) | (h << 6))
913        } else {
914            None
915        }
916    }
917
918    fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
919        let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
920
921        if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
922            Some(bytes[0])
923        } else {
924            None
925        }
926    }
927
928    fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
929        // First make sure that the shuffle immediate is selecting 16-bit lanes.
930        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
931
932        // Next build up an 8-bit mask from each of the bits of the selected
933        // lanes above. This instruction can only be used when each lane
934        // selector chooses from the corresponding lane in either of the two
935        // operands, meaning the Nth lane selection must satisfy `lane % 8 ==
936        // N`.
937        //
938        // This helper closure is used to calculate the value of the
939        // corresponding bit.
940        let bit = |x: u8, c: u8| {
941            if x % 8 == c {
942                if x < 8 { Some(0) } else { Some(1 << c) }
943            } else {
944                None
945            }
946        };
947        Some(
948            bit(a, 0)?
949                | bit(b, 1)?
950                | bit(c, 2)?
951                | bit(d, 3)?
952                | bit(e, 4)?
953                | bit(f, 5)?
954                | bit(g, 6)?
955                | bit(h, 7)?,
956        )
957    }
958
959    fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
960        XmmMemImm::unwrap_new(RegMemImm::imm(imm))
961    }
962
963    fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
964        let mask = -1i128 as u128;
965        self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
966    }
967
968    fn writable_invalid_gpr(&mut self) -> WritableGpr {
969        let reg = Gpr::new(self.invalid_reg()).unwrap();
970        WritableGpr::from_reg(reg)
971    }
972
973    fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
974        Box::new(amode.clone())
975    }
976
977    ////////////////////////////////////////////////////////////////////////////
978    ///// External assembler methods.
979    ////////////////////////////////////////////////////////////////////////////
980
981    fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
982        match src.clone().to_reg_mem_imm() {
983            RegMemImm::Imm { simm32 } => {
984                Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
985            }
986            _ => None,
987        }
988    }
989
990    fn is_imm8_xmm(&mut self, src: &XmmMemImm) -> Option<u8> {
991        match src.clone().to_reg_mem_imm() {
992            RegMemImm::Imm { simm32 } => {
993                Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
994            }
995            _ => None,
996        }
997    }
998
999    fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
1000        match src.clone().to_reg_mem_imm() {
1001            RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32.cast_signed()).ok()?),
1002            _ => None,
1003        }
1004    }
1005
1006    fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
1007        match src.clone().to_reg_mem_imm() {
1008            RegMemImm::Imm { simm32 } => {
1009                Some(i16::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
1010            }
1011            _ => None,
1012        }
1013    }
1014
1015    fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
1016        match src.clone().to_reg_mem_imm() {
1017            RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32.cast_signed()).ok()?),
1018            _ => None,
1019        }
1020    }
1021
1022    fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
1023        match src.clone().to_reg_mem_imm() {
1024            RegMemImm::Imm { simm32 } => Some(simm32),
1025            _ => None,
1026        }
1027    }
1028
1029    fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
1030        match src.clone().to_reg_mem_imm() {
1031            RegMemImm::Imm { simm32 } => Some(simm32 as i32),
1032            _ => None,
1033        }
1034    }
1035
1036    fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
1037        match src.clone().to_reg_mem_imm() {
1038            RegMemImm::Reg { reg } => Gpr::new(reg),
1039            _ => None,
1040        }
1041    }
1042
1043    fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1044        match src.clone().to_reg_mem() {
1045            RegMem::Reg { reg } => Xmm::new(reg),
1046            _ => None,
1047        }
1048    }
1049
1050    fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1051        match src.clone().to_reg_mem_imm() {
1052            RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1053            RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1054            _ => None,
1055        }
1056    }
1057
1058    fn is_xmm_mem(&mut self, src: &XmmMemImm) -> Option<XmmMem> {
1059        match src.clone().to_reg_mem_imm() {
1060            RegMemImm::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1061            RegMemImm::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1062            _ => None,
1063        }
1064    }
1065
1066    fn is_mem(&mut self, src: &XmmMem) -> Option<SyntheticAmode> {
1067        match src.clone().to_reg_mem() {
1068            RegMem::Reg { .. } => None,
1069            RegMem::Mem { addr } => Some(addr),
1070        }
1071    }
1072
1073    // Custom constructors for `mulx` which only calculates the high half of the
1074    // result meaning that the same output operand is used in both destination
1075    // registers. This is in contrast to the assembler-generated version of this
1076    // instruction which generates two distinct temporary registers for output
1077    // which calculates both the high and low halves of the result.
1078
1079    fn x64_mulxl_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1080        let ret = self.temp_writable_gpr();
1081        let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1082        let inst = asm::inst::mulxl_rvm::new(ret, ret, src1, src2);
1083        self.emit(&MInst::External { inst: inst.into() });
1084        ret.to_reg()
1085    }
1086
1087    fn x64_mulxq_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1088        let ret = self.temp_writable_gpr();
1089        let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1090        let inst = asm::inst::mulxq_rvm::new(ret, ret, src1, src2);
1091        self.emit(&MInst::External { inst: inst.into() });
1092        ret.to_reg()
1093    }
1094
1095    fn bt_imm(&mut self, val: u64) -> Option<u8> {
1096        if val.count_ones() == 1 {
1097            Some(u8::try_from(val.trailing_zeros()).unwrap())
1098        } else {
1099            None
1100        }
1101    }
1102}
1103
1104impl IsleContext<'_, '_, MInst, X64Backend> {
1105    fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1106        let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1107        self.lower_ctx.emit(MInst::External {
1108            inst: asm::inst::movdqu_a::new(
1109                Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1110                asm::XmmMem::Mem(addr.into()),
1111            )
1112            .into(),
1113        });
1114        Xmm::unwrap_new(tmp.to_reg())
1115    }
1116
1117    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1118    fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1119        let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1120        let write = WritableGpr::from_writable_reg(write).unwrap();
1121        asm::Gpr::new(PairedGpr { read, write })
1122    }
1123
1124    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1125    fn convert_gpr_to_assembler_fixed_read_write_gpr<const E: u8>(
1126        &mut self,
1127        read: Gpr,
1128    ) -> asm::Fixed<PairedGpr, E> {
1129        let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1130        let write = WritableGpr::from_writable_reg(write).unwrap();
1131        asm::Fixed(PairedGpr { read, write })
1132    }
1133
1134    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1135    fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1136        let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1137        let write = WritableXmm::from_writable_reg(write).unwrap();
1138        asm::Xmm::new(PairedXmm { read, write })
1139    }
1140
1141    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1142    fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1143        match read.clone().into() {
1144            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1145            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1146        }
1147    }
1148
1149    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1150    fn convert_xmm_mem_to_assembler_read_xmm_mem_aligned(
1151        &self,
1152        read: &XmmMemAligned,
1153    ) -> asm::XmmMem<Xmm, Gpr> {
1154        match read.clone().into() {
1155            RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1156            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1157        }
1158    }
1159
1160    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1161    fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1162        match read.clone().into() {
1163            RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1164            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1165        }
1166    }
1167
1168    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1169    fn convert_xmm_mem_to_assembler_write_xmm_mem(
1170        &self,
1171        write: &XmmMem,
1172    ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1173        match write.clone().into() {
1174            RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1175            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1176        }
1177    }
1178
1179    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1180    fn convert_xmm_mem_to_assembler_write_xmm_mem_aligned(
1181        &self,
1182        write: &XmmMemAligned,
1183    ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1184        match write.clone().into() {
1185            RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1186            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1187        }
1188    }
1189
1190    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1191    fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1192        &mut self,
1193        read: &GprMem,
1194    ) -> asm::GprMem<PairedGpr, Gpr> {
1195        match read.clone().into() {
1196            RegMem::Reg { reg } => asm::GprMem::Gpr(
1197                *self
1198                    .convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1199                    .as_ref(),
1200            ),
1201            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1202        }
1203    }
1204
1205    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1206    fn convert_gpr_mem_to_assembler_write_gpr_mem(
1207        &mut self,
1208        read: &GprMem,
1209    ) -> asm::GprMem<WritableGpr, Gpr> {
1210        match read.clone().into() {
1211            RegMem::Reg { reg } => asm::GprMem::Gpr(WritableGpr::from_reg(Gpr::new(reg).unwrap())),
1212            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1213        }
1214    }
1215
1216    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1217    fn convert_amode_to_assembler_amode(&mut self, amode: &SyntheticAmode) -> asm::Amode<Gpr> {
1218        amode.clone().into()
1219    }
1220}
1221
1222// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
1223// need to fix up the bits that migrate from one half of the lane to the
1224// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift
1225// right by 0 (no movement), we want to retain all the bits so we mask with
1226// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so
1227// we mask with `0x7f`; etc.
1228
1229#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1230const I8X16_ISHL_MASKS: [u8; 128] = [
1231    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1232    0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1233    0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1234    0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1235    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1236    0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1237    0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1238    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1239];
1240
1241#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1242const I8X16_USHR_MASKS: [u8; 128] = [
1243    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1244    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1245    0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1246    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1247    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1248    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1249    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1250    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1251];