pub(crate) mod generated_code;
use crate::{
    ir::types,
    ir::AtomicRmwOp,
    isa, isle_common_prelude_methods, isle_lower_prelude_methods,
    machinst::{InputSourceInst, Reg, Writable},
};
use generated_code::{Context, MInst, RegisterClass};
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, MergeableLoadSize};
use crate::ir::LibCall;
use crate::isa::x64::lower::emit_vm_call;
use crate::isa::x64::X64Backend;
use crate::{
    ir::{
        condcodes::{CondCode, FloatCC, IntCC},
        immediates::*,
        types::*,
        BlockCall, Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList,
    },
    isa::{
        unwind::UnwindInst,
        x64::{
            abi::X64CallSite,
            inst::{args::*, regs, CallInfo, ReturnCallInfo},
        },
    },
    machinst::{
        isle::*, valueregs, ArgPair, InsnInput, InstOutput, Lower, MachAtomicRmwOp, MachInst,
        VCodeConstant, VCodeConstantData,
    },
};
use alloc::vec::Vec;
use regalloc2::PReg;
use smallvec::SmallVec;
use std::boxed::Box;
use std::convert::TryFrom;
type BoxCallInfo = Box<CallInfo>;
type BoxReturnCallInfo = Box<ReturnCallInfo>;
type BoxVecMachLabel = Box<SmallVec<[MachLabel; 4]>>;
type MachLabelSlice = [MachLabel];
type VecArgPair = Vec<ArgPair>;
pub struct SinkableLoad {
    inst: Inst,
    addr_input: InsnInput,
    offset: i32,
}
pub(crate) fn lower(
    lower_ctx: &mut Lower<MInst>,
    backend: &X64Backend,
    inst: Inst,
) -> Option<InstOutput> {
    let mut isle_ctx = IsleContext { lower_ctx, backend };
    generated_code::constructor_lower(&mut isle_ctx, inst)
}
pub(crate) fn lower_branch(
    lower_ctx: &mut Lower<MInst>,
    backend: &X64Backend,
    branch: Inst,
    targets: &[MachLabel],
) -> Option<()> {
    let mut isle_ctx = IsleContext { lower_ctx, backend };
    generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec())
}
impl Context for IsleContext<'_, '_, MInst, X64Backend> {
    isle_lower_prelude_methods!();
    isle_prelude_caller_methods!(X64ABIMachineSpec, X64CallSite);
    fn gen_return_call_indirect(
        &mut self,
        callee_sig: SigRef,
        callee: Value,
        args: ValueSlice,
    ) -> InstOutput {
        let caller_conv = isa::CallConv::Tail;
        debug_assert_eq!(
            self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
            caller_conv,
            "Can only do `return_call`s from within a `tail` calling convention function"
        );
        let callee = self.put_in_reg(callee);
        let call_site = X64CallSite::from_ptr(
            self.lower_ctx.sigs(),
            callee_sig,
            callee,
            Opcode::ReturnCallIndirect,
            caller_conv,
            self.backend.flags().clone(),
        );
        call_site.emit_return_call(self.lower_ctx, args);
        InstOutput::new()
    }
    fn gen_return_call(
        &mut self,
        callee_sig: SigRef,
        callee: ExternalName,
        distance: RelocDistance,
        args: ValueSlice,
    ) -> InstOutput {
        let caller_conv = isa::CallConv::Tail;
        debug_assert_eq!(
            self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
            caller_conv,
            "Can only do `return_call`s from within a `tail` calling convention function"
        );
        let call_site = X64CallSite::from_func(
            self.lower_ctx.sigs(),
            callee_sig,
            &callee,
            distance,
            caller_conv,
            self.backend.flags().clone(),
        );
        call_site.emit_return_call(self.lower_ctx, args);
        InstOutput::new()
    }
    #[inline]
    fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
        if ty.bits() == 64 {
            OperandSize::Size64
        } else {
            OperandSize::Size32
        }
    }
    #[inline]
    fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
        OperandSize::from_ty(ty)
    }
    fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
        if let Some(c) = inputs.constant {
            if let Some(imm) = to_simm32(c as i64) {
                return imm.to_reg_mem_imm();
            }
        }
        self.put_in_reg_mem(val).into()
    }
    fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
        if let Some(c) = inputs.constant {
            if let Some(imm) = to_simm32(c as i64) {
                return XmmMemImm::new(imm.to_reg_mem_imm()).unwrap();
            }
        }
        let res = match self.put_in_xmm_mem(val).to_reg_mem() {
            RegMem::Reg { reg } => RegMemImm::Reg { reg },
            RegMem::Mem { addr } => RegMemImm::Mem { addr },
        };
        XmmMemImm::new(res).unwrap()
    }
    fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
        if let Some(c) = inputs.constant {
            let vcode_constant = self.emit_u128_le_const(c as u128);
            return XmmMem::new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)))
                .unwrap();
        }
        XmmMem::new(self.put_in_reg_mem(val)).unwrap()
    }
    fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
        if let Some(c) = inputs.constant {
            let vcode_constant = self.emit_u64_le_const(c);
            return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
        }
        if let Some(load) = self.sinkable_load(val) {
            return RegMem::Mem {
                addr: self.sink_load(&load),
            };
        }
        RegMem::reg(self.put_in_reg(val))
    }
    #[inline]
    fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
        imm.encode()
    }
    #[inline]
    fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
        imm.encode()
    }
    #[inline]
    fn use_avx(&mut self) -> bool {
        self.backend.x64_flags.use_avx()
    }
    #[inline]
    fn use_avx2(&mut self) -> bool {
        self.backend.x64_flags.use_avx2()
    }
    #[inline]
    fn use_avx512vl(&mut self) -> bool {
        self.backend.x64_flags.use_avx512vl()
    }
    #[inline]
    fn use_avx512dq(&mut self) -> bool {
        self.backend.x64_flags.use_avx512dq()
    }
    #[inline]
    fn use_avx512f(&mut self) -> bool {
        self.backend.x64_flags.use_avx512f()
    }
    #[inline]
    fn use_avx512bitalg(&mut self) -> bool {
        self.backend.x64_flags.use_avx512bitalg()
    }
    #[inline]
    fn use_avx512vbmi(&mut self) -> bool {
        self.backend.x64_flags.use_avx512vbmi()
    }
    #[inline]
    fn use_lzcnt(&mut self) -> bool {
        self.backend.x64_flags.use_lzcnt()
    }
    #[inline]
    fn use_bmi1(&mut self) -> bool {
        self.backend.x64_flags.use_bmi1()
    }
    #[inline]
    fn use_popcnt(&mut self) -> bool {
        self.backend.x64_flags.use_popcnt()
    }
    #[inline]
    fn use_fma(&mut self) -> bool {
        self.backend.x64_flags.use_fma()
    }
    #[inline]
    fn use_ssse3(&mut self) -> bool {
        self.backend.x64_flags.use_ssse3()
    }
    #[inline]
    fn use_sse41(&mut self) -> bool {
        self.backend.x64_flags.use_sse41()
    }
    #[inline]
    fn use_sse42(&mut self) -> bool {
        self.backend.x64_flags.use_sse42()
    }
    #[inline]
    fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
        let constant = self.lower_ctx.get_constant(inst)?;
        let imm = u8::try_from(constant).ok()?;
        Some(Imm8Reg::Imm8 { imm })
    }
    #[inline]
    fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Gpr {
        let mask = self.shift_mask(ty) as u64;
        Imm8Gpr::new(Imm8Reg::Imm8 {
            imm: (c & mask) as u8,
        })
        .unwrap()
    }
    #[inline]
    fn shift_mask(&mut self, ty: Type) -> u8 {
        debug_assert!(ty.lane_bits().is_power_of_two());
        (ty.lane_bits() - 1) as u8
    }
    fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
        (val.bits() as u8) & self.shift_mask(ty)
    }
    #[inline]
    fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
        let inst = self.lower_ctx.dfg().value_def(val).inst()?;
        let constant: u64 = self.lower_ctx.get_constant(inst)?;
        let constant = constant as i64;
        to_simm32(constant)
    }
    #[inline]
    fn simm32_from_imm64(&mut self, imm: Imm64) -> Option<GprMemImm> {
        to_simm32(imm.bits())
    }
    fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
        let input = self.lower_ctx.get_value_as_source_or_const(val);
        if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
            if let Some((addr_input, offset)) =
                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
            {
                return Some(SinkableLoad {
                    inst,
                    addr_input,
                    offset,
                });
            }
        }
        None
    }
    fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
        let input = self.lower_ctx.get_value_as_source_or_const(val);
        if let InputSourceInst::UniqueUse(inst, 0) = input.inst {
            if let Some((addr_input, offset)) =
                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
            {
                return Some(SinkableLoad {
                    inst,
                    addr_input,
                    offset,
                });
            }
        }
        None
    }
    fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
        self.lower_ctx.sink_inst(load.inst);
        let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
        SyntheticAmode::Real(addr)
    }
    #[inline]
    fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
        ExtMode::new(from_bits, to_bits).unwrap()
    }
    fn emit(&mut self, inst: &MInst) -> Unit {
        self.lower_ctx.emit(inst.clone());
    }
    #[inline]
    fn nonzero_u64_fits_in_u32(&mut self, x: u64) -> Option<u64> {
        if x != 0 && x < u64::from(u32::MAX) {
            Some(x)
        } else {
            None
        }
    }
    #[inline]
    fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
        0b00_00_00_00 | lane << 4
    }
    #[inline]
    fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
        RegMem::mem(addr.clone())
    }
    #[inline]
    fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
        amode.clone().into()
    }
    #[inline]
    fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
        SyntheticAmode::ConstantOffset(c)
    }
    #[inline]
    fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
        r.to_writable_reg()
    }
    #[inline]
    fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
        r.to_writable_reg()
    }
    fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
        debug_assert!(amt < 8);
        let mask_offset = amt as usize * 16;
        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
            &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
        ));
        SyntheticAmode::ConstantOffset(mask_constant)
    }
    fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
        let mask_table = self
            .lower_ctx
            .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
        SyntheticAmode::ConstantOffset(mask_table)
    }
    fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
        debug_assert!(amt < 8);
        let mask_offset = amt as usize * 16;
        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
            &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
        ));
        SyntheticAmode::ConstantOffset(mask_constant)
    }
    fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
        let mask_table = self
            .lower_ctx
            .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
        SyntheticAmode::ConstantOffset(mask_table)
    }
    #[inline]
    fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
        Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
    }
    #[inline]
    fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
        r.to_reg()
    }
    #[inline]
    fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
        r.to_reg()
    }
    #[inline]
    fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
        r.into()
    }
    #[inline]
    fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
        r.into()
    }
    #[inline]
    fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
        r.into()
    }
    #[inline]
    fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
        XmmMemImm::new(r.clone().to_reg_mem().into()).unwrap()
    }
    #[inline]
    fn temp_writable_gpr(&mut self) -> WritableGpr {
        self.lower_ctx.temp_writable_gpr()
    }
    #[inline]
    fn temp_writable_xmm(&mut self) -> WritableXmm {
        self.lower_ctx.temp_writable_xmm()
    }
    #[inline]
    fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
        RegMemImm::Reg { reg }
    }
    #[inline]
    fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
        XmmMem::new(rm.clone()).unwrap()
    }
    #[inline]
    fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
        GprMemImm::new(rmi.clone()).unwrap()
    }
    #[inline]
    fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
        XmmMemImm::new(rmi.clone()).unwrap()
    }
    #[inline]
    fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
        r.into()
    }
    #[inline]
    fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
        xm.clone().into()
    }
    #[inline]
    fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
        gm.clone().into()
    }
    #[inline]
    fn xmm_new(&mut self, r: Reg) -> Xmm {
        Xmm::new(r).unwrap()
    }
    #[inline]
    fn gpr_new(&mut self, r: Reg) -> Gpr {
        Gpr::new(r).unwrap()
    }
    #[inline]
    fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
        GprMem::new(rm.clone()).unwrap()
    }
    #[inline]
    fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
        GprMem::new(RegMem::reg(r)).unwrap()
    }
    #[inline]
    fn imm8_reg_to_imm8_gpr(&mut self, ir: &Imm8Reg) -> Imm8Gpr {
        Imm8Gpr::new(ir.clone()).unwrap()
    }
    #[inline]
    fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
        GprMem::from(gpr)
    }
    #[inline]
    fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
        GprMemImm::from(gpr)
    }
    #[inline]
    fn gpr_to_imm8_gpr(&mut self, gpr: Gpr) -> Imm8Gpr {
        Imm8Gpr::from(gpr)
    }
    #[inline]
    fn imm8_to_imm8_gpr(&mut self, imm: u8) -> Imm8Gpr {
        Imm8Gpr::new(Imm8Reg::Imm8 { imm }).unwrap()
    }
    #[inline]
    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
        if is_int_or_ref_ty(ty) || ty == I128 {
            Some(RegisterClass::Gpr {
                single_register: ty != I128,
            })
        } else if ty == F32 || ty == F64 || (ty.is_vector() && ty.bits() == 128) {
            Some(RegisterClass::Xmm)
        } else {
            None
        }
    }
    #[inline]
    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
        match ty {
            types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => Some(()),
            types::R32 => panic!("shouldn't have 32-bits refs on x64"),
            _ => None,
        }
    }
    #[inline]
    fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC {
        x.without_equal()
    }
    #[inline]
    fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
        CC::from_intcc(*intcc)
    }
    #[inline]
    fn cc_invert(&mut self, cc: &CC) -> CC {
        cc.invert()
    }
    #[inline]
    fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
        match cc {
            CC::Z => Some(*cc),
            CC::NZ => Some(*cc),
            _ => None,
        }
    }
    #[inline]
    fn sum_extend_fits_in_32_bits(
        &mut self,
        extend_from_ty: Type,
        constant_value: Imm64,
        offset: Offset32,
    ) -> Option<u32> {
        let offset: i64 = offset.into();
        let constant_value: u64 = constant_value.bits() as u64;
        let shift = 64 - extend_from_ty.bits();
        let zero_extended_constant_value = (constant_value << shift) >> shift;
        let sum = offset.wrapping_add(zero_extended_constant_value as i64);
        if sum == ((sum << 32) >> 32) {
            Some(sum as u32)
        } else {
            None
        }
    }
    #[inline]
    fn amode_offset(&mut self, addr: &Amode, offset: u32) -> Amode {
        addr.offset(offset)
    }
    #[inline]
    fn zero_offset(&mut self) -> Offset32 {
        Offset32::new(0)
    }
    #[inline]
    fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp {
        MachAtomicRmwOp::from(*op)
    }
    #[inline]
    fn preg_rbp(&mut self) -> PReg {
        regs::rbp().to_real_reg().unwrap().into()
    }
    #[inline]
    fn preg_rsp(&mut self) -> PReg {
        regs::rsp().to_real_reg().unwrap().into()
    }
    #[inline]
    fn preg_pinned(&mut self) -> PReg {
        regs::pinned_reg().to_real_reg().unwrap().into()
    }
    fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
        let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
        let ret_ty = libcall.signature(call_conv, I64).returns[0].value_type;
        let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
        emit_vm_call(
            self.lower_ctx,
            &self.backend.flags,
            &self.backend.triple,
            libcall.clone(),
            &[a],
            &[output_reg],
        )
        .expect("Failed to emit LibCall");
        output_reg.to_reg()
    }
    fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
        let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
        let ret_ty = libcall.signature(call_conv, I64).returns[0].value_type;
        let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
        emit_vm_call(
            self.lower_ctx,
            &self.backend.flags,
            &self.backend.triple,
            libcall.clone(),
            &[a, b],
            &[output_reg],
        )
        .expect("Failed to emit LibCall");
        output_reg.to_reg()
    }
    fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
        let call_conv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs());
        let ret_ty = libcall.signature(call_conv, I64).returns[0].value_type;
        let output_reg = self.lower_ctx.alloc_tmp(ret_ty).only_reg().unwrap();
        emit_vm_call(
            self.lower_ctx,
            &self.backend.flags,
            &self.backend.triple,
            libcall.clone(),
            &[a, b, c],
            &[output_reg],
        )
        .expect("Failed to emit LibCall");
        output_reg.to_reg()
    }
    #[inline]
    fn single_target(&mut self, targets: &MachLabelSlice) -> Option<MachLabel> {
        if targets.len() == 1 {
            Some(targets[0])
        } else {
            None
        }
    }
    #[inline]
    fn two_targets(&mut self, targets: &MachLabelSlice) -> Option<(MachLabel, MachLabel)> {
        if targets.len() == 2 {
            Some((targets[0], targets[1]))
        } else {
            None
        }
    }
    #[inline]
    fn jump_table_targets(
        &mut self,
        targets: &MachLabelSlice,
    ) -> Option<(MachLabel, BoxVecMachLabel)> {
        if targets.is_empty() {
            return None;
        }
        let default_label = targets[0];
        let jt_targets = Box::new(SmallVec::from(&targets[1..]));
        Some((default_label, jt_targets))
    }
    #[inline]
    fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 {
        targets.len() as u32
    }
    #[inline]
    fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
        let const_data = self.lower_ctx.get_constant_data(constant);
        if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
            return Some(());
        }
        None
    }
    #[inline]
    fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
        let mask = mask
            .iter()
            .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
            .map(|b| if b > 15 { 0b10000000 } else { b })
            .collect();
        self.lower_ctx
            .use_constant(VCodeConstantData::Generated(mask))
    }
    #[inline]
    fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
        let mask = mask
            .iter()
            .map(|&b| if b > 15 { 0b10000000 } else { b })
            .collect();
        self.lower_ctx
            .use_constant(VCodeConstantData::Generated(mask))
    }
    #[inline]
    fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
        let mask = mask
            .iter()
            .map(|&b| b.wrapping_sub(16))
            .map(|b| if b > 15 { 0b10000000 } else { b })
            .collect();
        self.lower_ctx
            .use_constant(VCodeConstantData::Generated(mask))
    }
    #[inline]
    fn perm_from_mask_with_zeros(
        &mut self,
        mask: &VecMask,
    ) -> Option<(VCodeConstant, VCodeConstant)> {
        if !mask.iter().any(|&b| b > 31) {
            return None;
        }
        let zeros = mask
            .iter()
            .map(|&b| if b > 31 { 0x00 } else { 0xff })
            .collect();
        Some((
            self.perm_from_mask(mask),
            self.lower_ctx
                .use_constant(VCodeConstantData::Generated(zeros)),
        ))
    }
    #[inline]
    fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
        let mask = mask.iter().cloned().collect();
        self.lower_ctx
            .use_constant(VCodeConstantData::Generated(mask))
    }
    fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
        match XmmMemAligned::new(arg.clone().into()) {
            Some(aligned) => aligned,
            None => match arg.clone().into() {
                RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
                _ => unreachable!(),
            },
        }
    }
    fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
        match XmmMemAlignedImm::new(arg.clone().into()) {
            Some(aligned) => aligned,
            None => match arg.clone().into() {
                RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
                _ => unreachable!(),
            },
        }
    }
    fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
        if a < 4 && b < 4 && c < 4 && d < 4 {
            Some(a | (b << 2) | (c << 4) | (d << 6))
        } else {
            None
        }
    }
    fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
        let a = a.checked_sub(4)?;
        let b = b.checked_sub(4)?;
        let c = c.checked_sub(4)?;
        let d = d.checked_sub(4)?;
        if a < 4 && b < 4 && c < 4 && d < 4 {
            Some(a | (b << 2) | (c << 4) | (d << 6))
        } else {
            None
        }
    }
    fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
        let c = c.checked_sub(4)?;
        let d = d.checked_sub(4)?;
        if a < 4 && b < 4 && c < 4 && d < 4 {
            Some(a | (b << 2) | (c << 4) | (d << 6))
        } else {
            None
        }
    }
    fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
        let a = a.checked_sub(4)?;
        let b = b.checked_sub(4)?;
        if a < 4 && b < 4 && c < 4 && d < 4 {
            Some(a | (b << 2) | (c << 4) | (d << 6))
        } else {
            None
        }
    }
    fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
            Some(a | (b << 2) | (c << 4) | (d << 6))
        } else {
            None
        }
    }
    fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
        let a = a.checked_sub(8)?;
        let b = b.checked_sub(8)?;
        let c = c.checked_sub(8)?;
        let d = d.checked_sub(8)?;
        let e = e.checked_sub(8)?;
        let f = f.checked_sub(8)?;
        let g = g.checked_sub(8)?;
        let h = h.checked_sub(8)?;
        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
            Some(a | (b << 2) | (c << 4) | (d << 6))
        } else {
            None
        }
    }
    fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
        let e = e.checked_sub(4)?;
        let f = f.checked_sub(4)?;
        let g = g.checked_sub(4)?;
        let h = h.checked_sub(4)?;
        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
            Some(e | (f << 2) | (g << 4) | (h << 6))
        } else {
            None
        }
    }
    fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
        let a = a.checked_sub(8)?;
        let b = b.checked_sub(8)?;
        let c = c.checked_sub(8)?;
        let d = d.checked_sub(8)?;
        let e = e.checked_sub(12)?;
        let f = f.checked_sub(12)?;
        let g = g.checked_sub(12)?;
        let h = h.checked_sub(12)?;
        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
            Some(e | (f << 2) | (g << 4) | (h << 6))
        } else {
            None
        }
    }
    fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
        let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
        if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
            Some(bytes[0])
        } else {
            None
        }
    }
    fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
        let bit = |x: u8, c: u8| {
            if x % 8 == c {
                if x < 8 {
                    Some(0)
                } else {
                    Some(1 << c)
                }
            } else {
                None
            }
        };
        Some(
            bit(a, 0)?
                | bit(b, 1)?
                | bit(c, 2)?
                | bit(d, 3)?
                | bit(e, 4)?
                | bit(f, 5)?
                | bit(g, 6)?
                | bit(h, 7)?,
        )
    }
    fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
        XmmMemImm::new(RegMemImm::imm(imm)).unwrap()
    }
    fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
        let mask = -1i128 as u128;
        self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
    }
}
impl IsleContext<'_, '_, MInst, X64Backend> {
    isle_prelude_method_helpers!(X64CallSite);
    fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
        let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
        self.lower_ctx.emit(MInst::XmmUnaryRmRUnaligned {
            op: SseOpcode::Movdqu,
            src: XmmMem::new(RegMem::mem(addr)).unwrap(),
            dst: Writable::from_reg(Xmm::new(tmp.to_reg()).unwrap()),
        });
        Xmm::new(tmp.to_reg()).unwrap()
    }
}
#[rustfmt::skip] const I8X16_ISHL_MASKS: [u8; 128] = [
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
    0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
    0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
    0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
    0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
];
#[rustfmt::skip] const I8X16_USHR_MASKS: [u8; 128] = [
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
];
#[inline]
fn to_simm32(constant: i64) -> Option<GprMemImm> {
    if constant == ((constant << 32) >> 32) {
        Some(
            GprMemImm::new(RegMemImm::Imm {
                simm32: constant as u32,
            })
            .unwrap(),
        )
    } else {
        None
    }
}