use super::{
abi::X64ABI,
address::Address,
asm::{Assembler, PatchableAddToReg, VcmpKind, VcvtKind, VroundMode},
regs::{self, rbp, rsp},
};
use anyhow::{Result, anyhow, bail};
use crate::masm::{
DivKind, Extend, ExtendKind, ExtractLaneKind, FloatCmpKind, Imm as I, IntCmpKind, LaneSelector,
LoadKind, MacroAssembler as Masm, MulWideKind, OperandSize, RegImm, RemKind, ReplaceLaneKind,
RmwOp, RoundingMode, ShiftKind, SplatKind, StoreKind, TRUSTED_FLAGS, TrapCode, TruncKind,
UNTRUSTED_FLAGS, V128AbsKind, V128AddKind, V128ConvertKind, V128ExtAddKind, V128ExtMulKind,
V128ExtendKind, V128MaxKind, V128MinKind, V128MulKind, V128NarrowKind, V128NegKind,
V128SubKind, V128TruncKind, VectorCompareKind, VectorEqualityKind, Zero,
};
use crate::{
abi::{self, LocalSlot, align_to, calculate_frame_adjustment},
codegen::{CodeGenContext, CodeGenError, Emission, FuncEnv, ptr_type_from_ptr_size},
stack::{TypedReg, Val},
};
use crate::{
abi::{ABI, vmctx},
masm::{SPOffset, StackSlot},
};
use crate::{
isa::{
CallingConvention,
reg::{Reg, RegClass, WritableReg, writable},
},
masm::CalleeKind,
};
use cranelift_codegen::{
Final, MachBufferFinalized, MachLabel,
binemit::CodeOffset,
ir::{MemFlags, RelSourceLoc, SourceLoc},
isa::{
unwind::UnwindInst,
x64::{
AtomicRmwSeqOp,
args::{Avx512Opcode, AvxOpcode, CC, FenceKind},
settings as x64_settings,
},
},
settings,
};
use wasmtime_cranelift::TRAP_UNREACHABLE;
use wasmtime_environ::{PtrSize, WasmValType};
#[rustfmt::skip] const I8X16_ISHL_MASKS: [u8; 128] = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
];
#[rustfmt::skip] const I8X16_USHR_MASKS: [u8; 128] = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
];
pub(crate) struct MacroAssembler {
sp_offset: u32,
sp_max: u32,
stack_max_use_add: Option<PatchableAddToReg>,
asm: Assembler,
flags: x64_settings::Flags,
shared_flags: settings::Flags,
ptr_size: OperandSize,
}
impl Masm for MacroAssembler {
type Address = Address;
type Ptr = u8;
type ABI = X64ABI;
fn frame_setup(&mut self) -> Result<()> {
let frame_pointer = rbp();
let stack_pointer = rsp();
self.asm.push_r(frame_pointer);
if self.shared_flags.unwind_info() {
self.asm.unwind_inst(UnwindInst::PushFrameRegs {
offset_upward_to_caller_sp: Self::ABI::arg_base_offset().into(),
})
}
self.asm
.mov_rr(stack_pointer, writable!(frame_pointer), OperandSize::S64);
Ok(())
}
fn check_stack(&mut self, vmctx: Reg) -> Result<()> {
let ptr_size: u8 = self.ptr_size.bytes().try_into().unwrap();
let scratch = regs::scratch();
self.load_ptr(
self.address_at_reg(vmctx, ptr_size.vmcontext_store_context().into())?,
writable!(scratch),
)?;
self.load_ptr(
Address::offset(scratch, ptr_size.vmstore_context_stack_limit().into()),
writable!(scratch),
)?;
self.add_stack_max(scratch);
self.asm.cmp_rr(scratch, regs::rsp(), self.ptr_size);
self.asm.trapif(IntCmpKind::GtU, TrapCode::STACK_OVERFLOW);
if self.shared_flags.unwind_info() {
self.asm.unwind_inst(UnwindInst::DefineNewFrame {
offset_upward_to_caller_sp: Self::ABI::arg_base_offset().into(),
offset_downward_to_clobbers: 0,
})
}
Ok(())
}
fn push(&mut self, reg: Reg, size: OperandSize) -> Result<StackSlot> {
let bytes = match (reg.class(), size) {
(RegClass::Int, OperandSize::S64) => {
let word_bytes = <Self::ABI as ABI>::word_bytes() as u32;
self.asm.push_r(reg);
self.increment_sp(word_bytes);
word_bytes
}
(RegClass::Int, OperandSize::S32) => {
let bytes = size.bytes();
self.reserve_stack(bytes)?;
let sp_offset = SPOffset::from_u32(self.sp_offset);
self.asm
.mov_rm(reg, &self.address_from_sp(sp_offset)?, size, TRUSTED_FLAGS);
bytes
}
(RegClass::Float, _) => {
let bytes = size.bytes();
self.reserve_stack(bytes)?;
let sp_offset = SPOffset::from_u32(self.sp_offset);
self.asm
.xmm_mov_rm(reg, &self.address_from_sp(sp_offset)?, size, TRUSTED_FLAGS);
bytes
}
_ => unreachable!(),
};
Ok(StackSlot {
offset: SPOffset::from_u32(self.sp_offset),
size: bytes,
})
}
fn reserve_stack(&mut self, bytes: u32) -> Result<()> {
if bytes == 0 {
return Ok(());
}
self.asm
.sub_ir(bytes as i32, writable!(rsp()), OperandSize::S64);
self.increment_sp(bytes);
Ok(())
}
fn free_stack(&mut self, bytes: u32) -> Result<()> {
if bytes == 0 {
return Ok(());
}
self.asm
.add_ir(bytes as i32, writable!(rsp()), OperandSize::S64);
self.decrement_sp(bytes);
Ok(())
}
fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()> {
self.sp_offset = offset.as_u32();
Ok(())
}
fn local_address(&mut self, local: &LocalSlot) -> Result<Address> {
let (reg, offset) = if local.addressed_from_sp() {
let offset = self
.sp_offset
.checked_sub(local.offset)
.ok_or_else(|| CodeGenError::invalid_local_offset())?;
(rsp(), offset)
} else {
(rbp(), local.offset)
};
Ok(Address::offset(reg, offset))
}
fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address> {
Ok(Address::offset(
regs::rsp(),
self.sp_offset - offset.as_u32(),
))
}
fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address> {
Ok(Address::offset(regs::rsp(), offset.as_u32()))
}
fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address> {
Ok(Address::offset(vmctx!(Self), offset))
}
fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()> {
self.store(src.into(), dst, self.ptr_size)
}
fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) -> Result<()> {
self.store_impl(src, dst, size, TRUSTED_FLAGS)
}
fn wasm_store(&mut self, src: Reg, dst: Self::Address, kind: StoreKind) -> Result<()> {
match kind {
StoreKind::Operand(size) => {
self.store_impl(src.into(), dst, size, UNTRUSTED_FLAGS)?;
}
StoreKind::Atomic(size) => {
if size == OperandSize::S128 {
bail!(CodeGenError::unexpected_operand_size());
}
self.store_impl(src.into(), dst, size, UNTRUSTED_FLAGS)?;
self.asm.fence(FenceKind::MFence);
}
StoreKind::VectorLane(LaneSelector { lane, size }) => {
self.ensure_has_avx()?;
self.asm
.xmm_vpextr_rm(&dst, src, lane, size, UNTRUSTED_FLAGS)?;
}
}
Ok(())
}
fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()> {
let current_sp = SPOffset::from_u32(self.sp_offset);
let _ = match (dst.to_reg().class(), size) {
(RegClass::Int, OperandSize::S32) => {
let addr = self.address_from_sp(current_sp)?;
self.asm.movzx_mr(
&addr,
dst,
size.extend_to::<Zero>(OperandSize::S64),
TRUSTED_FLAGS,
);
self.free_stack(size.bytes())?;
}
(RegClass::Int, OperandSize::S64) => {
self.asm.pop_r(dst);
self.decrement_sp(<Self::ABI as ABI>::word_bytes() as u32);
}
(RegClass::Float, _) | (RegClass::Vector, _) => {
let addr = self.address_from_sp(current_sp)?;
self.asm.xmm_mov_mr(&addr, dst, size, TRUSTED_FLAGS);
self.free_stack(size.bytes())?;
}
_ => bail!(CodeGenError::invalid_operand_combination()),
};
Ok(())
}
fn call(
&mut self,
stack_args_size: u32,
mut load_callee: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
) -> Result<u32> {
let alignment: u32 = <Self::ABI as abi::ABI>::call_stack_align().into();
let addend: u32 = <Self::ABI as abi::ABI>::initial_frame_size().into();
let delta = calculate_frame_adjustment(self.sp_offset()?.as_u32(), addend, alignment);
let aligned_args_size = align_to(stack_args_size, alignment);
let total_stack = delta + aligned_args_size;
self.reserve_stack(total_stack)?;
let (callee, cc) = load_callee(self)?;
match callee {
CalleeKind::Indirect(reg) => self.asm.call_with_reg(cc, reg),
CalleeKind::Direct(idx) => self.asm.call_with_name(cc, idx),
};
Ok(total_stack)
}
fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()> {
self.load(src, dst, self.ptr_size)
}
fn compute_addr(
&mut self,
src: Self::Address,
dst: WritableReg,
size: OperandSize,
) -> Result<()> {
self.asm.lea(&src, dst, size);
Ok(())
}
fn load(&mut self, src: Address, dst: WritableReg, size: OperandSize) -> Result<()> {
self.load_impl(src, dst, size, TRUSTED_FLAGS)
}
fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()> {
let size = kind.derive_operand_size();
match kind {
LoadKind::ScalarExtend(ext) => match ext {
ExtendKind::Signed(ext) => {
self.asm.movsx_mr(&src, dst, ext, UNTRUSTED_FLAGS);
}
ExtendKind::Unsigned(_) => self.load_impl(src, dst, size, UNTRUSTED_FLAGS)?,
},
LoadKind::Operand(_) | LoadKind::Atomic(_, _) => {
if kind.is_atomic() && size == OperandSize::S128 {
bail!(CodeGenError::unexpected_operand_size());
}
self.load_impl(src, dst, size, UNTRUSTED_FLAGS)?;
}
LoadKind::VectorExtend(ext) => {
self.ensure_has_avx()?;
self.asm
.xmm_vpmov_mr(&src, dst, ext.into(), UNTRUSTED_FLAGS)
}
LoadKind::Splat(_) => {
self.ensure_has_avx()?;
if size == OperandSize::S64 {
self.asm
.xmm_mov_mr(&src, dst, OperandSize::S64, UNTRUSTED_FLAGS);
self.asm.xmm_vpshuf_rr(
dst.to_reg(),
dst,
Self::vpshuf_mask_for_64_bit_splats(),
OperandSize::S32,
);
} else {
self.asm
.xmm_vpbroadcast_mr(&src, dst, size, UNTRUSTED_FLAGS);
}
}
LoadKind::VectorLane(LaneSelector { lane, size }) => {
self.ensure_has_avx()?;
let byte_tmp = regs::scratch();
self.load_impl(src, writable!(byte_tmp), size, UNTRUSTED_FLAGS)?;
self.asm
.xmm_vpinsr_rrr(dst, dst.to_reg(), byte_tmp, lane, size);
}
LoadKind::VectorZero(size) => {
self.ensure_has_avx()?;
let scratch = regs::scratch();
self.load_impl(src, writable!(scratch), size, UNTRUSTED_FLAGS)?;
self.asm.avx_gpr_to_xmm(scratch, dst, size);
}
}
Ok(())
}
fn sp_offset(&self) -> Result<SPOffset> {
Ok(SPOffset::from_u32(self.sp_offset))
}
fn zero(&mut self, reg: WritableReg) -> Result<()> {
self.asm.xor_rr(
reg.to_reg(),
reg,
OperandSize::from_bytes(<Self::ABI>::word_bytes()),
);
Ok(())
}
fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()> {
match (src, dst.to_reg()) {
(RegImm::Reg(src), dst_reg) => match (src.class(), dst_reg.class()) {
(RegClass::Int, RegClass::Int) => Ok(self.asm.mov_rr(src, dst, size)),
(RegClass::Float, RegClass::Float) => Ok(self.asm.xmm_mov_rr(src, dst, size)),
_ => bail!(CodeGenError::invalid_operand_combination()),
},
(RegImm::Imm(imm), _) => self.load_constant(&imm, dst, size),
}
}
fn cmov(
&mut self,
dst: WritableReg,
src: Reg,
cc: IntCmpKind,
size: OperandSize,
) -> Result<()> {
match (src.class(), dst.to_reg().class()) {
(RegClass::Int, RegClass::Int) => Ok(self.asm.cmov(src, dst, cc, size)),
(RegClass::Float, RegClass::Float) => Ok(self.asm.xmm_cmov(src, dst, cc, size)),
_ => Err(anyhow!(CodeGenError::invalid_operand_combination())),
}
}
fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
match (rhs, dst) {
(RegImm::Imm(imm), _) => {
if let Some(v) = imm.to_i32() {
self.asm.add_ir(v, dst, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.add_rr(scratch, dst, size);
}
}
(RegImm::Reg(src), dst) => {
self.asm.add_rr(src, dst, size);
}
}
Ok(())
}
fn checked_uadd(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: RegImm,
size: OperandSize,
trap: TrapCode,
) -> Result<()> {
self.add(dst, lhs, rhs, size)?;
self.asm.trapif(CC::B, trap);
Ok(())
}
fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
match (rhs, dst) {
(RegImm::Imm(imm), reg) => {
if let Some(v) = imm.to_i32() {
self.asm.sub_ir(v, reg, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.sub_rr(scratch, reg, size);
}
}
(RegImm::Reg(src), dst) => {
self.asm.sub_rr(src, dst, size);
}
}
Ok(())
}
fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
match (rhs, dst) {
(RegImm::Imm(imm), _) => {
if let Some(v) = imm.to_i32() {
self.asm.mul_ir(v, dst, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.mul_rr(scratch, dst, size);
}
}
(RegImm::Reg(src), dst) => {
self.asm.mul_rr(src, dst, size);
}
}
Ok(())
}
fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.xmm_add_rr(rhs, dst, size);
Ok(())
}
fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.xmm_sub_rr(rhs, dst, size);
Ok(())
}
fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.xmm_mul_rr(rhs, dst, size);
Ok(())
}
fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.xmm_div_rr(rhs, dst, size);
Ok(())
}
fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.xmm_min_seq(rhs, dst, size);
Ok(())
}
fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.xmm_max_seq(rhs, dst, size);
Ok(())
}
fn float_copysign(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
size: OperandSize,
) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
let scratch_gpr = regs::scratch();
let scratch_xmm = regs::scratch_xmm();
let sign_mask = match size {
OperandSize::S32 => I::I32(0x80000000),
OperandSize::S64 => I::I64(0x8000000000000000),
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => {
bail!(CodeGenError::unexpected_operand_size())
}
};
self.load_constant(&sign_mask, writable!(scratch_gpr), size)?;
self.asm
.gpr_to_xmm(scratch_gpr, writable!(scratch_xmm), size);
self.asm.xmm_and_rr(scratch_xmm, writable!(rhs), size);
self.asm
.xmm_andn_rr(dst.to_reg(), writable!(scratch_xmm), size);
self.asm.xmm_mov_rr(scratch_xmm, dst, size);
self.asm.xmm_or_rr(rhs, dst, size);
Ok(())
}
fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()> {
debug_assert_eq!(dst.to_reg().class(), RegClass::Float);
let mask = match size {
OperandSize::S32 => I::I32(0x80000000),
OperandSize::S64 => I::I64(0x8000000000000000),
OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => {
bail!(CodeGenError::unexpected_operand_size())
}
};
let scratch_gpr = regs::scratch();
self.load_constant(&mask, writable!(scratch_gpr), size)?;
let scratch_xmm = regs::scratch_xmm();
self.asm
.gpr_to_xmm(scratch_gpr, writable!(scratch_xmm), size);
self.asm.xmm_xor_rr(scratch_xmm, dst, size);
Ok(())
}
fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()> {
debug_assert_eq!(dst.to_reg().class(), RegClass::Float);
let mask = match size {
OperandSize::S32 => I::I32(0x7fffffff),
OperandSize::S64 => I::I64(0x7fffffffffffffff),
OperandSize::S128 | OperandSize::S16 | OperandSize::S8 => {
bail!(CodeGenError::unexpected_operand_size())
}
};
let scratch_gpr = regs::scratch();
self.load_constant(&mask, writable!(scratch_gpr), size)?;
let scratch_xmm = regs::scratch_xmm();
self.asm
.gpr_to_xmm(scratch_gpr, writable!(scratch_xmm), size);
self.asm.xmm_and_rr(scratch_xmm, dst, size);
Ok(())
}
fn float_round<
F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
>(
&mut self,
mode: RoundingMode,
env: &mut FuncEnv<Self::Ptr>,
context: &mut CodeGenContext<Emission>,
size: OperandSize,
mut fallback: F,
) -> Result<()> {
if self.flags.has_sse41() {
let src = context.pop_to_reg(self, None)?;
self.asm
.xmm_rounds_rr(src.into(), writable!(src.into()), mode, size);
context.stack.push(src.into());
Ok(())
} else {
fallback(env, context, self)
}
}
fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()> {
self.asm.sqrt(src, dst, size);
Ok(())
}
fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
match (rhs, dst) {
(RegImm::Imm(imm), _) => {
if let Some(v) = imm.to_i32() {
self.asm.and_ir(v, dst, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.and_rr(scratch, dst, size);
}
}
(RegImm::Reg(src), dst) => {
self.asm.and_rr(src, dst, size);
}
}
Ok(())
}
fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
match (rhs, dst) {
(RegImm::Imm(imm), _) => {
if let Some(v) = imm.to_i32() {
self.asm.or_ir(v, dst, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.or_rr(scratch, dst, size);
}
}
(RegImm::Reg(src), dst) => {
self.asm.or_rr(src, dst, size);
}
}
Ok(())
}
fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
match (rhs, dst) {
(RegImm::Imm(imm), _) => {
if let Some(v) = imm.to_i32() {
self.asm.xor_ir(v, dst, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.xor_rr(scratch, dst, size);
}
}
(RegImm::Reg(src), _) => {
self.asm.xor_rr(src, dst, size);
}
}
Ok(())
}
fn shift_ir(
&mut self,
dst: WritableReg,
imm: u64,
lhs: Reg,
kind: ShiftKind,
size: OperandSize,
) -> Result<()> {
Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?;
self.asm.shift_ir(imm as u8, dst, kind, size);
Ok(())
}
fn shift(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: ShiftKind,
size: OperandSize,
) -> Result<()> {
let src = context.pop_to_reg(self, Some(regs::rcx()))?;
let dst = context.pop_to_reg(self, None)?;
self.asm
.shift_rr(src.into(), writable!(dst.into()), kind, size);
context.free_reg(src);
context.stack.push(dst.into());
Ok(())
}
fn div(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: DivKind,
size: OperandSize,
) -> Result<()> {
let rdx = context.reg(regs::rdx(), self)?;
let rax = context.reg(regs::rax(), self)?;
let divisor = context.pop_to_reg(self, None)?;
context.free_reg(rax);
let rax = context.pop_to_reg(self, Some(rax))?;
self.asm.div(divisor.into(), (rax.into(), rdx), kind, size);
context.free_reg(divisor);
context.free_reg(rdx);
context.stack.push(rax.into());
Ok(())
}
fn rem(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: RemKind,
size: OperandSize,
) -> Result<()> {
let rdx = context.reg(regs::rdx(), self)?;
let rax = context.reg(regs::rax(), self)?;
let divisor = context.pop_to_reg(self, None)?;
context.free_reg(rax);
let rax = context.pop_to_reg(self, Some(rax))?;
self.asm.rem(divisor.reg, (rax.into(), rdx), kind, size);
context.free_reg(divisor);
context.free_reg(rax);
context.stack.push(Val::reg(rdx, divisor.ty));
Ok(())
}
fn frame_restore(&mut self) -> Result<()> {
debug_assert_eq!(self.sp_offset, 0);
self.asm.pop_r(writable!(rbp()));
self.asm.ret();
Ok(())
}
fn finalize(mut self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>> {
if let Some(patch) = self.stack_max_use_add {
patch.finalize(i32::try_from(self.sp_max).unwrap(), self.asm.buffer_mut());
}
Ok(self.asm.finalize(base))
}
fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address> {
Ok(Address::offset(reg, offset))
}
fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()> {
match src2 {
RegImm::Imm(imm) => {
if let Some(v) = imm.to_i32() {
self.asm.cmp_ir(src1, v, size);
} else {
let scratch = regs::scratch();
self.load_constant(&imm, writable!(scratch), size)?;
self.asm.cmp_rr(src1, scratch, size);
}
}
RegImm::Reg(src2) => {
self.asm.cmp_rr(src1, src2, size);
}
}
Ok(())
}
fn cmp_with_set(
&mut self,
dst: WritableReg,
src: RegImm,
kind: IntCmpKind,
size: OperandSize,
) -> Result<()> {
self.cmp(dst.to_reg(), src, size)?;
self.asm.setcc(kind, dst);
Ok(())
}
fn float_cmp_with_set(
&mut self,
dst: WritableReg,
src1: Reg,
src2: Reg,
kind: FloatCmpKind,
size: OperandSize,
) -> Result<()> {
let (src1, src2, set_kind) = match kind {
FloatCmpKind::Eq => (src1, src2, IntCmpKind::Eq),
FloatCmpKind::Ne => (src1, src2, IntCmpKind::Ne),
FloatCmpKind::Gt => (src1, src2, IntCmpKind::GtU),
FloatCmpKind::Ge => (src1, src2, IntCmpKind::GeU),
FloatCmpKind::Lt => (src2, src1, IntCmpKind::GtU),
FloatCmpKind::Le => (src2, src1, IntCmpKind::GeU),
};
self.asm.ucomis(src1, src2, size);
self.asm.setcc(set_kind, dst);
let _ = match kind {
FloatCmpKind::Eq | FloatCmpKind::Gt | FloatCmpKind::Ge => {
let scratch = regs::scratch();
self.asm.setnp(writable!(scratch));
self.asm.and_rr(scratch, dst, size);
}
FloatCmpKind::Ne => {
let scratch = regs::scratch();
self.asm.setp(writable!(scratch));
self.asm.or_rr(scratch, dst, size);
}
FloatCmpKind::Lt | FloatCmpKind::Le => (),
};
Ok(())
}
fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()> {
if self.flags.has_lzcnt() {
self.asm.lzcnt(src, dst, size);
} else {
let scratch = regs::scratch();
self.asm.bsr(src, dst, size);
self.asm.setcc(IntCmpKind::Ne, writable!(scratch));
self.asm.neg(dst.to_reg(), dst, size);
self.asm.add_ir(size.num_bits() as i32, dst, size);
self.asm.sub_rr(scratch, dst, size);
}
Ok(())
}
fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()> {
if self.flags.has_bmi1() {
self.asm.tzcnt(src, dst, size);
} else {
let scratch = regs::scratch();
self.asm.bsf(src, dst, size);
self.asm.setcc(IntCmpKind::Eq, writable!(scratch));
self.asm
.shift_ir(size.log2(), writable!(scratch), ShiftKind::Shl, size);
self.asm.add_rr(scratch, dst, size);
}
Ok(())
}
fn get_label(&mut self) -> Result<MachLabel> {
let buffer = self.asm.buffer_mut();
Ok(buffer.get_label())
}
fn bind(&mut self, label: MachLabel) -> Result<()> {
let buffer = self.asm.buffer_mut();
buffer.bind_label(label, &mut Default::default());
Ok(())
}
fn branch(
&mut self,
kind: IntCmpKind,
lhs: Reg,
rhs: RegImm,
taken: MachLabel,
size: OperandSize,
) -> Result<()> {
use IntCmpKind::*;
match &(lhs, rhs) {
(rlhs, RegImm::Reg(rrhs)) => {
if (kind == Eq || kind == Ne) && (rlhs == rrhs) {
self.asm.test_rr(*rlhs, *rrhs, size);
} else {
self.cmp(lhs, rhs, size)?;
}
}
_ => self.cmp(lhs, rhs, size)?,
}
self.asm.jmp_if(kind, taken);
Ok(())
}
fn jmp(&mut self, target: MachLabel) -> Result<()> {
self.asm.jmp(target);
Ok(())
}
fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()> {
let src = context.pop_to_reg(self, None)?;
if self.flags.has_popcnt() && self.flags.has_sse42() {
self.asm.popcnt(src.into(), writable!(src.into()), size);
context.stack.push(src.into());
Ok(())
} else {
let tmp = writable!(context.any_gpr(self)?);
let dst = writable!(src.into());
let (masks, shift_amt) = match size {
OperandSize::S64 => (
[
0x5555555555555555, 0x3333333333333333, 0x0f0f0f0f0f0f0f0f, 0x0101010101010101, ],
56u8,
),
OperandSize::S32 => (
[0x55555555i64, 0x33333333i64, 0x0f0f0f0fi64, 0x01010101i64],
24u8,
),
_ => bail!(CodeGenError::unexpected_operand_size()),
};
self.asm.mov_rr(src.into(), tmp, size);
self.asm.shift_ir(1u8, dst, ShiftKind::ShrU, size);
let lhs = dst.to_reg();
self.and(writable!(lhs), lhs, RegImm::i64(masks[0]), size)?;
self.asm.sub_rr(dst.to_reg(), tmp, size);
self.asm.mov_rr(tmp.to_reg(), dst, size);
let scratch = regs::scratch();
self.load_constant(&I::i64(masks[1]), writable!(scratch), size)?;
self.asm.and_rr(scratch, dst, size);
self.asm.shift_ir(2u8, tmp, ShiftKind::ShrU, size);
self.asm.and_rr(scratch, tmp, size);
self.asm.add_rr(dst.to_reg(), tmp, size);
self.asm.mov_rr(tmp.to_reg(), dst, size);
self.asm.shift_ir(4u8, dst, ShiftKind::ShrU, size);
self.asm.add_rr(tmp.to_reg(), dst, size);
let lhs = dst.to_reg();
self.and(writable!(lhs), lhs, RegImm::i64(masks[2]), size)?;
let lhs = dst.to_reg();
self.mul(writable!(lhs), lhs, RegImm::i64(masks[3]), size)?;
self.asm.shift_ir(shift_amt, dst, ShiftKind::ShrU, size);
context.stack.push(src.into());
context.free_reg(tmp.to_reg());
Ok(())
}
}
fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()> {
self.asm.mov_rr(src, dst, OperandSize::S32);
Ok(())
}
fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()> {
match kind {
ExtendKind::Signed(ext) => {
self.asm.movsx_rr(src, dst, ext);
}
ExtendKind::Unsigned(ext) => {
self.asm.movzx_rr(src, dst, ext);
}
}
Ok(())
}
fn signed_truncate(
&mut self,
dst: WritableReg,
src: Reg,
src_size: OperandSize,
dst_size: OperandSize,
kind: TruncKind,
) -> Result<()> {
self.asm.cvt_float_to_sint_seq(
src,
dst,
regs::scratch(),
regs::scratch_xmm(),
src_size,
dst_size,
kind.is_checked(),
);
Ok(())
}
fn unsigned_truncate(
&mut self,
ctx: &mut CodeGenContext<Emission>,
src_size: OperandSize,
dst_size: OperandSize,
kind: TruncKind,
) -> Result<()> {
let dst_ty = match dst_size {
OperandSize::S32 => WasmValType::I32,
OperandSize::S64 => WasmValType::I64,
_ => bail!(CodeGenError::unexpected_operand_size()),
};
ctx.convert_op_with_tmp_reg(
self,
dst_ty,
RegClass::Float,
|masm, dst, src, tmp_fpr, dst_size| {
masm.asm.cvt_float_to_uint_seq(
src,
writable!(dst),
regs::scratch(),
regs::scratch_xmm(),
tmp_fpr,
src_size,
dst_size,
kind.is_checked(),
);
Ok(())
},
)
}
fn signed_convert(
&mut self,
dst: WritableReg,
src: Reg,
src_size: OperandSize,
dst_size: OperandSize,
) -> Result<()> {
self.asm.cvt_sint_to_float(src, dst, src_size, dst_size);
Ok(())
}
fn unsigned_convert(
&mut self,
dst: WritableReg,
src: Reg,
tmp_gpr: Reg,
src_size: OperandSize,
dst_size: OperandSize,
) -> Result<()> {
if let OperandSize::S32 = src_size {
self.extend(
writable!(src),
src,
ExtendKind::Unsigned(Extend::I64Extend32),
)?;
}
self.asm
.cvt_uint64_to_float_seq(src, dst, regs::scratch(), tmp_gpr, dst_size);
Ok(())
}
fn reinterpret_float_as_int(
&mut self,
dst: WritableReg,
src: Reg,
size: OperandSize,
) -> Result<()> {
self.asm.xmm_to_gpr(src, dst, size);
Ok(())
}
fn reinterpret_int_as_float(
&mut self,
dst: WritableReg,
src: Reg,
size: OperandSize,
) -> Result<()> {
self.asm.gpr_to_xmm(src, dst, size);
Ok(())
}
fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()> {
self.asm
.cvt_float_to_float(src, dst, OperandSize::S64, OperandSize::S32);
Ok(())
}
fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()> {
self.asm
.cvt_float_to_float(src, dst, OperandSize::S32, OperandSize::S64);
Ok(())
}
fn unreachable(&mut self) -> Result<()> {
self.asm.trap(TRAP_UNREACHABLE);
Ok(())
}
fn trap(&mut self, code: TrapCode) -> Result<()> {
self.asm.trap(code);
Ok(())
}
fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()> {
self.asm.trapif(cc, code);
Ok(())
}
fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()> {
self.asm.test_rr(src, src, self.ptr_size);
self.asm.trapif(IntCmpKind::Eq, code);
Ok(())
}
fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()> {
debug_assert!(targets.len() >= 1);
let default_index = targets.len() - 1;
let max = default_index;
let size = OperandSize::S32;
self.asm.mov_ir(max as u64, writable!(tmp), size);
self.asm.cmp_rr(tmp, index, size);
self.asm.cmov(tmp, writable!(index), IntCmpKind::LtU, size);
let default = targets[default_index];
let rest = &targets[0..default_index];
let tmp1 = regs::scratch();
self.asm.jmp_table(rest.into(), default, index, tmp1, tmp);
Ok(())
}
fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)> {
Ok(self.asm.buffer_mut().start_srcloc(loc))
}
fn end_source_loc(&mut self) -> Result<()> {
self.asm.buffer_mut().end_srcloc();
Ok(())
}
fn current_code_offset(&self) -> Result<CodeOffset> {
Ok(self.asm.buffer().cur_offset())
}
fn add128(
&mut self,
dst_lo: WritableReg,
dst_hi: WritableReg,
lhs_lo: Reg,
lhs_hi: Reg,
rhs_lo: Reg,
rhs_hi: Reg,
) -> Result<()> {
Self::ensure_two_argument_form(&dst_lo.to_reg(), &lhs_lo)?;
Self::ensure_two_argument_form(&dst_hi.to_reg(), &lhs_hi)?;
self.asm.add_rr(rhs_lo, dst_lo, OperandSize::S64);
self.asm.adc_rr(rhs_hi, dst_hi, OperandSize::S64);
Ok(())
}
fn sub128(
&mut self,
dst_lo: WritableReg,
dst_hi: WritableReg,
lhs_lo: Reg,
lhs_hi: Reg,
rhs_lo: Reg,
rhs_hi: Reg,
) -> Result<()> {
Self::ensure_two_argument_form(&dst_lo.to_reg(), &lhs_lo)?;
Self::ensure_two_argument_form(&dst_hi.to_reg(), &lhs_hi)?;
self.asm.sub_rr(rhs_lo, dst_lo, OperandSize::S64);
self.asm.sbb_rr(rhs_hi, dst_hi, OperandSize::S64);
Ok(())
}
fn mul_wide(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: MulWideKind,
) -> Result<()> {
let rax = context.reg(regs::rax(), self)?;
let rdx = context.reg(regs::rdx(), self)?;
let rhs = context.pop_to_reg(self, None)?;
context.free_reg(rax);
let lhs = context.pop_to_reg(self, Some(rax))?;
self.asm.mul_wide(
writable!(rax),
writable!(rdx),
lhs.reg,
rhs.reg,
kind,
OperandSize::S64,
);
context.free_reg(rhs);
context.stack.push(lhs.into());
context.stack.push(Val::Reg(TypedReg::i64(rdx)));
Ok(())
}
fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()> {
let (src, dst) = match size {
SplatKind::F32x4 | SplatKind::F64x2 => {
let reg = context.pop_to_reg(self, None)?.reg;
(RegImm::reg(reg), writable!(reg))
}
SplatKind::I8x16 | SplatKind::I16x8 | SplatKind::I32x4 | SplatKind::I64x2 => {
let dst = writable!(context.any_fpr(self)?);
let src = if size == SplatKind::I64x2 {
context.pop_i64_const().map(RegImm::i64)
} else {
context.pop_i32_const().map(RegImm::i32)
}
.map_or_else(
|| -> Result<RegImm> {
let reg = context.pop_to_reg(self, None)?.reg;
self.reinterpret_int_as_float(
dst,
reg,
match size {
SplatKind::I8x16 | SplatKind::I16x8 | SplatKind::I32x4 => {
OperandSize::S32
}
SplatKind::I64x2 => OperandSize::S64,
SplatKind::F32x4 | SplatKind::F64x2 => unreachable!(),
},
)?;
context.free_reg(reg);
Ok(RegImm::Reg(dst.to_reg()))
},
Ok,
)?;
(src, dst)
}
};
if size == SplatKind::I64x2 || size == SplatKind::F64x2 {
self.ensure_has_avx()?;
let mask = Self::vpshuf_mask_for_64_bit_splats();
match src {
RegImm::Reg(src) => self.asm.xmm_vpshuf_rr(src, dst, mask, OperandSize::S32),
RegImm::Imm(imm) => {
let src = self.asm.add_constant(&imm.to_bytes());
self.asm
.xmm_vpshuf_mr(&src, dst, mask, OperandSize::S32, MemFlags::trusted());
}
}
} else {
self.ensure_has_avx2()?;
match src {
RegImm::Reg(src) => self.asm.xmm_vpbroadcast_rr(src, dst, size.lane_size()),
RegImm::Imm(imm) => {
let src = self.asm.add_constant(&imm.to_bytes());
self.asm
.xmm_vpbroadcast_mr(&src, dst, size.lane_size(), MemFlags::trusted());
}
}
}
context
.stack
.push(Val::reg(dst.to_reg(), WasmValType::V128));
Ok(())
}
fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()> {
self.ensure_has_avx()?;
let mut mask_lhs: [u8; 16] = [0x80; 16];
let mut mask_rhs: [u8; 16] = [0x80; 16];
for i in 0..lanes.len() {
if lanes[i] < 16 {
mask_lhs[i] = lanes[i];
} else {
mask_rhs[i] = lanes[i] - 16;
}
}
let mask_lhs = self.asm.add_constant(&mask_lhs);
let mask_rhs = self.asm.add_constant(&mask_rhs);
self.asm.xmm_vpshufb_rrm(dst, lhs, &mask_lhs);
let scratch = writable!(regs::scratch_xmm());
self.asm.xmm_vpshufb_rrm(scratch, rhs, &mask_rhs);
self.asm.vpor(dst, dst.to_reg(), scratch.to_reg());
Ok(())
}
fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()> {
self.ensure_has_avx()?;
let clamp = self.asm.add_constant(&[0x70; 16]);
self.asm.xmm_vpaddusb_rrm(writable!(rhs), rhs, &clamp);
self.asm.xmm_vpshufb_rrr(dst, lhs, rhs);
Ok(())
}
fn atomic_rmw(
&mut self,
context: &mut CodeGenContext<Emission>,
addr: Self::Address,
size: OperandSize,
op: RmwOp,
flags: MemFlags,
extend: Option<Extend<Zero>>,
) -> Result<()> {
let res = match op {
RmwOp::Add => {
let operand = context.pop_to_reg(self, None)?;
self.asm
.lock_xadd(addr, operand.reg, writable!(operand.reg), size, flags);
operand.reg
}
RmwOp::Sub => {
let operand = context.pop_to_reg(self, None)?;
self.asm.neg(operand.reg, writable!(operand.reg), size);
self.asm
.lock_xadd(addr, operand.reg, writable!(operand.reg), size, flags);
operand.reg
}
RmwOp::Xchg => {
let operand = context.pop_to_reg(self, None)?;
self.asm
.xchg(addr, operand.reg, writable!(operand.reg), size, flags);
operand.reg
}
RmwOp::And | RmwOp::Or | RmwOp::Xor => {
let op = match op {
RmwOp::And => AtomicRmwSeqOp::And,
RmwOp::Or => AtomicRmwSeqOp::Or,
RmwOp::Xor => AtomicRmwSeqOp::Xor,
_ => unreachable!(
"invalid op for atomic_rmw_seq, should be one of `or`, `and` or `xor`"
),
};
let dst = context.reg(regs::rax(), self)?;
let operand = context.pop_to_reg(self, None)?;
self.asm
.atomic_rmw_seq(addr, operand.reg, writable!(dst), size, flags, op);
context.free_reg(operand.reg);
dst
}
};
let dst_ty = match extend {
Some(ext) => {
if !(ext.from_bits() == 32 && ext.to_bits() == 64) {
self.asm.movzx_rr(res, writable!(res), ext);
}
WasmValType::int_from_bits(ext.to_bits())
}
None => WasmValType::int_from_bits(size.num_bits()),
};
context.stack.push(TypedReg::new(dst_ty, res).into());
Ok(())
}
fn extract_lane(
&mut self,
src: Reg,
dst: WritableReg,
lane: u8,
kind: ExtractLaneKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
ExtractLaneKind::I8x16S
| ExtractLaneKind::I8x16U
| ExtractLaneKind::I16x8S
| ExtractLaneKind::I16x8U
| ExtractLaneKind::I32x4
| ExtractLaneKind::I64x2 => self.asm.xmm_vpextr_rr(dst, src, lane, kind.lane_size()),
ExtractLaneKind::F32x4 | ExtractLaneKind::F64x2 if lane == 0 => {
assert!(src == dst.to_reg());
}
ExtractLaneKind::F32x4 => self.asm.xmm_vpshuf_rr(src, dst, lane, kind.lane_size()),
ExtractLaneKind::F64x2 => {
assert!(lane == 1);
self.asm
.xmm_vpshuf_rr(src, dst, 0b11_10_11_10, OperandSize::S32)
}
}
match kind {
ExtractLaneKind::I8x16S | ExtractLaneKind::I16x8S => {
self.asm.movsx_rr(dst.to_reg(), dst, kind.into())
}
_ => (),
}
Ok(())
}
fn replace_lane(
&mut self,
src: RegImm,
dst: WritableReg,
lane: u8,
kind: ReplaceLaneKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
ReplaceLaneKind::I8x16
| ReplaceLaneKind::I16x8
| ReplaceLaneKind::I32x4
| ReplaceLaneKind::I64x2 => match src {
RegImm::Reg(reg) => {
self.asm
.xmm_vpinsr_rrr(dst, dst.to_reg(), reg, lane, kind.lane_size());
}
RegImm::Imm(imm) => {
let address = self.asm.add_constant(&imm.to_bytes());
self.asm
.xmm_vpinsr_rrm(dst, dst.to_reg(), &address, lane, kind.lane_size());
}
},
ReplaceLaneKind::F32x4 => {
let imm = lane << 4;
match src {
RegImm::Reg(reg) => self.asm.xmm_vinsertps_rrr(dst, dst.to_reg(), reg, imm),
RegImm::Imm(val) => {
let address = self.asm.add_constant(&val.to_bytes());
self.asm.xmm_vinsertps_rrm(dst, dst.to_reg(), &address, imm);
}
}
}
ReplaceLaneKind::F64x2 => match src {
RegImm::Reg(reg) => match lane {
0 => self.asm.xmm_vmovsd_rrr(dst, dst.to_reg(), reg),
1 => self.asm.xmm_vmovlhps_rrr(dst, dst.to_reg(), reg),
_ => unreachable!(),
},
RegImm::Imm(imm) => {
let address = self.asm.add_constant(&imm.to_bytes());
match lane {
0 => {
let scratch = writable!(regs::scratch_xmm());
self.asm.xmm_vmovsd_rm(scratch, &address);
self.asm.xmm_vmovsd_rrr(dst, dst.to_reg(), scratch.to_reg());
}
1 => self.asm.xmm_vmovlhps_rrm(dst, dst.to_reg(), &address),
_ => unreachable!(),
}
}
},
}
Ok(())
}
fn atomic_cas(
&mut self,
context: &mut CodeGenContext<Emission>,
addr: Self::Address,
size: OperandSize,
flags: MemFlags,
extend: Option<Extend<Zero>>,
) -> Result<()> {
let rax = context.reg(regs::rax(), self)?;
let replacement = context.pop_to_reg(self, None)?;
context.free_reg(rax);
let expected = context.pop_to_reg(self, Some(regs::rax()))?;
self.asm.cmpxchg(
addr,
expected.reg,
replacement.reg,
writable!(expected.reg),
size,
flags,
);
if let Some(extend) = extend {
if !(extend.from_bits() == 32 && extend.to_bits() == 64) {
self.asm
.movzx_rr(expected.reg, writable!(expected.reg), extend);
}
}
context.stack.push(expected.into());
context.free_reg(replacement);
Ok(())
}
fn v128_eq(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
kind: VectorEqualityKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
VectorEqualityKind::I8x16
| VectorEqualityKind::I16x8
| VectorEqualityKind::I32x4
| VectorEqualityKind::I64x2 => {
self.asm.xmm_vpcmpeq_rrr(dst, lhs, rhs, kind.lane_size())
}
VectorEqualityKind::F32x4 | VectorEqualityKind::F64x2 => {
self.asm
.xmm_vcmpp_rrr(dst, lhs, rhs, kind.lane_size(), VcmpKind::Eq)
}
}
Ok(())
}
fn v128_ne(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
kind: VectorEqualityKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
VectorEqualityKind::I8x16
| VectorEqualityKind::I16x8
| VectorEqualityKind::I32x4
| VectorEqualityKind::I64x2 => {
self.asm
.xmm_vpcmpeq_rrr(writable!(lhs), lhs, rhs, kind.lane_size());
self.asm
.xmm_vpcmpeq_rrr(writable!(rhs), rhs, rhs, kind.lane_size());
self.asm.xmm_vex_rr(AvxOpcode::Vpxor, lhs, rhs, dst);
}
VectorEqualityKind::F32x4 | VectorEqualityKind::F64x2 => {
self.asm
.xmm_vcmpp_rrr(dst, lhs, rhs, kind.lane_size(), VcmpKind::Ne)
}
}
Ok(())
}
fn v128_lt(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
kind: VectorCompareKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
VectorCompareKind::I8x16S
| VectorCompareKind::I16x8S
| VectorCompareKind::I32x4S
| VectorCompareKind::I64x2S => {
self.asm.xmm_vpcmpgt_rrr(dst, rhs, lhs, kind.lane_size())
}
VectorCompareKind::I8x16U | VectorCompareKind::I16x8U | VectorCompareKind::I32x4U => {
self.asm
.xmm_vpminu_rrr(writable!(lhs), lhs, rhs, kind.lane_size());
self.asm
.xmm_vpcmpeq_rrr(writable!(lhs), lhs, rhs, kind.lane_size());
self.asm
.xmm_vpcmpeq_rrr(writable!(rhs), rhs, rhs, kind.lane_size());
self.asm.xmm_vex_rr(AvxOpcode::Vpxor, lhs, rhs, dst);
}
VectorCompareKind::F32x4 | VectorCompareKind::F64x2 => {
self.asm
.xmm_vcmpp_rrr(dst, lhs, rhs, kind.lane_size(), VcmpKind::Lt)
}
}
Ok(())
}
fn v128_le(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
kind: VectorCompareKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
VectorCompareKind::I8x16S | VectorCompareKind::I16x8S | VectorCompareKind::I32x4S => {
self.asm
.xmm_vpmins_rrr(writable!(rhs), lhs, rhs, kind.lane_size());
self.asm.xmm_vpcmpeq_rrr(dst, lhs, rhs, kind.lane_size());
}
VectorCompareKind::I64x2S => {
self.asm
.xmm_vpcmpgt_rrr(writable!(lhs), lhs, rhs, kind.lane_size());
self.asm
.xmm_vpcmpeq_rrr(writable!(rhs), rhs, rhs, kind.lane_size());
self.asm.xmm_vex_rr(AvxOpcode::Vpxor, lhs, rhs, dst);
}
VectorCompareKind::I8x16U | VectorCompareKind::I16x8U | VectorCompareKind::I32x4U => {
self.asm
.xmm_vpminu_rrr(writable!(rhs), lhs, rhs, kind.lane_size());
self.asm.xmm_vpcmpeq_rrr(dst, lhs, rhs, kind.lane_size());
}
VectorCompareKind::F32x4 | VectorCompareKind::F64x2 => {
self.asm
.xmm_vcmpp_rrr(dst, lhs, rhs, kind.lane_size(), VcmpKind::Le)
}
}
Ok(())
}
fn v128_gt(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
kind: VectorCompareKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
VectorCompareKind::I8x16S
| VectorCompareKind::I16x8S
| VectorCompareKind::I32x4S
| VectorCompareKind::I64x2S => {
self.asm.xmm_vpcmpgt_rrr(dst, lhs, rhs, kind.lane_size())
}
VectorCompareKind::I8x16U | VectorCompareKind::I16x8U | VectorCompareKind::I32x4U => {
self.asm
.xmm_vpmaxu_rrr(writable!(lhs), lhs, rhs, kind.lane_size());
self.asm
.xmm_vpcmpeq_rrr(writable!(lhs), lhs, rhs, kind.lane_size());
self.asm
.xmm_vpcmpeq_rrr(writable!(rhs), rhs, rhs, kind.lane_size());
self.asm.xmm_vex_rr(AvxOpcode::Vpxor, lhs, rhs, dst);
}
VectorCompareKind::F32x4 | VectorCompareKind::F64x2 => {
self.asm
.xmm_vcmpp_rrr(dst, rhs, lhs, kind.lane_size(), VcmpKind::Lt)
}
}
Ok(())
}
fn v128_ge(
&mut self,
dst: WritableReg,
lhs: Reg,
rhs: Reg,
kind: VectorCompareKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
VectorCompareKind::I8x16S | VectorCompareKind::I16x8S | VectorCompareKind::I32x4S => {
self.asm
.xmm_vpmaxs_rrr(writable!(rhs), lhs, rhs, kind.lane_size());
self.asm.xmm_vpcmpeq_rrr(dst, lhs, rhs, kind.lane_size());
}
VectorCompareKind::I64x2S => {
self.asm
.xmm_vpcmpgt_rrr(writable!(rhs), rhs, lhs, kind.lane_size());
self.asm.xmm_vpcmpeq_rrr(dst, lhs, lhs, kind.lane_size());
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, dst.to_reg(), rhs, dst);
}
VectorCompareKind::I8x16U | VectorCompareKind::I16x8U | VectorCompareKind::I32x4U => {
self.asm
.xmm_vpmaxu_rrr(writable!(rhs), lhs, rhs, kind.lane_size());
self.asm.xmm_vpcmpeq_rrr(dst, lhs, rhs, kind.lane_size());
}
VectorCompareKind::F32x4 | VectorCompareKind::F64x2 => {
self.asm
.xmm_vcmpp_rrr(dst, rhs, lhs, kind.lane_size(), VcmpKind::Le)
}
}
Ok(())
}
fn fence(&mut self) -> Result<()> {
self.asm.fence(FenceKind::MFence);
Ok(())
}
fn v128_not(&mut self, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
let tmp = regs::scratch_xmm();
self.asm
.xmm_vex_rr(AvxOpcode::Vpcmpeqd, tmp, tmp, writable!(tmp));
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, tmp, dst.to_reg(), dst);
Ok(())
}
fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vex_rr(AvxOpcode::Vpand, src1, src2, dst);
Ok(())
}
fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vex_rr(AvxOpcode::Vpandn, src1, src2, dst);
Ok(())
}
fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vex_rr(AvxOpcode::Vpor, src1, src2, dst);
Ok(())
}
fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vex_rr(AvxOpcode::Vpxor, src1, src2, dst);
Ok(())
}
fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
let tmp = regs::scratch_xmm();
self.v128_and(src1, mask, writable!(tmp))?;
self.v128_and_not(mask, src2, dst)?;
self.v128_or(dst.to_reg(), tmp, dst)?;
Ok(())
}
fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vptest(src, src);
self.asm.setcc(IntCmpKind::Ne, dst);
Ok(())
}
fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128ConvertKind::I32x4S => self.asm.xmm_vcvt_rr(src, dst, VcvtKind::I32ToF32),
V128ConvertKind::I32x4LowS => self.asm.xmm_vcvt_rr(src, dst, VcvtKind::I32ToF64),
V128ConvertKind::I32x4U => {
let scratch = writable!(regs::scratch_xmm());
self.asm
.xmm_vpsll_rr(src, scratch, 0x10, kind.src_lane_size());
self.asm
.xmm_vpsrl_rr(scratch.to_reg(), scratch, 0x10, kind.src_lane_size());
self.asm
.xmm_vpsub_rrr(src, scratch.to_reg(), dst, kind.src_lane_size());
self.asm
.xmm_vcvt_rr(scratch.to_reg(), scratch, VcvtKind::I32ToF32);
self.asm
.xmm_vpsrl_rr(dst.to_reg(), dst, 1, kind.src_lane_size());
self.asm.xmm_vcvt_rr(dst.to_reg(), dst, VcvtKind::I32ToF32);
self.asm
.xmm_vaddp_rrr(dst.to_reg(), dst.to_reg(), dst, kind.src_lane_size());
self.asm
.xmm_vaddp_rrr(dst.to_reg(), scratch.to_reg(), dst, kind.src_lane_size());
}
V128ConvertKind::I32x4LowU => {
let conversion_constant = self
.asm
.add_constant(&[0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43]);
self.asm
.xmm_vunpcklp_rrm(src, &conversion_constant, dst, kind.src_lane_size());
let conversion_constant = self.asm.add_constant(&[
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x30, 0x43,
]);
self.asm.xmm_vsub_rrm(
dst.to_reg(),
&conversion_constant,
dst,
kind.dst_lane_size(),
);
}
}
Ok(())
}
fn v128_narrow(
&mut self,
src1: Reg,
src2: Reg,
dst: WritableReg,
kind: V128NarrowKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128NarrowKind::I16x8S | V128NarrowKind::I32x4S => {
self.asm
.xmm_vpackss_rrr(src1, src2, dst, kind.dst_lane_size())
}
V128NarrowKind::I16x8U | V128NarrowKind::I32x4U => {
self.asm
.xmm_vpackus_rrr(src1, src2, dst, kind.dst_lane_size())
}
}
Ok(())
}
fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vcvt_rr(src, dst, VcvtKind::F64ToF32);
Ok(())
}
fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vcvt_rr(src, dst, VcvtKind::F32ToF64);
Ok(())
}
fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128ExtendKind::LowI8x16S
| V128ExtendKind::LowI8x16U
| V128ExtendKind::LowI16x8S
| V128ExtendKind::LowI16x8U
| V128ExtendKind::LowI32x4S
| V128ExtendKind::LowI32x4U => self.asm.xmm_vpmov_rr(src, dst, kind.into()),
V128ExtendKind::HighI8x16S | V128ExtendKind::HighI16x8S => {
self.asm.xmm_vpalignr_rrr(src, src, dst, 0x8);
self.asm.xmm_vpmov_rr(dst.to_reg(), dst, kind.into());
}
V128ExtendKind::HighI8x16U | V128ExtendKind::HighI16x8U => {
let scratch = regs::scratch_xmm();
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, scratch, scratch, writable!(scratch));
self.asm
.xmm_vpunpckh_rrr(src, scratch, dst, kind.src_lane_size());
}
V128ExtendKind::HighI32x4S => {
self.asm
.xmm_vpshuf_rr(src, dst, 0b11_10_11_10, kind.src_lane_size());
self.asm.xmm_vpmov_rr(dst.to_reg(), dst, kind.into());
}
V128ExtendKind::HighI32x4U => {
let scratch = regs::scratch_xmm();
self.asm
.xmm_vxorp_rrr(scratch, scratch, writable!(scratch), kind.src_lane_size());
self.asm
.xmm_vunpckhp_rrr(src, scratch, dst, kind.src_lane_size());
}
}
Ok(())
}
fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()> {
self.ensure_has_avx()?;
let op = match kind {
V128AddKind::F32x4 => AvxOpcode::Vaddps,
V128AddKind::F64x2 => AvxOpcode::Vaddpd,
V128AddKind::I8x16 => AvxOpcode::Vpaddb,
V128AddKind::I8x16SatS => AvxOpcode::Vpaddsb,
V128AddKind::I8x16SatU => AvxOpcode::Vpaddusb,
V128AddKind::I16x8 => AvxOpcode::Vpaddw,
V128AddKind::I16x8SatS => AvxOpcode::Vpaddsw,
V128AddKind::I16x8SatU => AvxOpcode::Vpaddusw,
V128AddKind::I32x4 => AvxOpcode::Vpaddd,
V128AddKind::I64x2 => AvxOpcode::Vpaddq,
};
self.asm.xmm_vex_rr(op, lhs, rhs, dst);
Ok(())
}
fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()> {
self.ensure_has_avx()?;
let op = match kind {
V128SubKind::F32x4 => AvxOpcode::Vsubps,
V128SubKind::F64x2 => AvxOpcode::Vsubpd,
V128SubKind::I8x16 => AvxOpcode::Vpsubb,
V128SubKind::I8x16SatS => AvxOpcode::Vpsubsb,
V128SubKind::I8x16SatU => AvxOpcode::Vpsubusb,
V128SubKind::I16x8 => AvxOpcode::Vpsubw,
V128SubKind::I16x8SatS => AvxOpcode::Vpsubsw,
V128SubKind::I16x8SatU => AvxOpcode::Vpsubusw,
V128SubKind::I32x4 => AvxOpcode::Vpsubd,
V128SubKind::I64x2 => AvxOpcode::Vpsubq,
};
self.asm.xmm_vex_rr(op, lhs, rhs, dst);
Ok(())
}
fn v128_mul(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: V128MulKind,
) -> Result<()> {
self.ensure_has_avx()?;
let rhs = context.pop_to_reg(self, None)?;
let lhs = context.pop_to_reg(self, None)?;
let mul_avx = |this: &mut Self, op| {
this.asm
.xmm_vex_rr(op, lhs.reg, rhs.reg, writable!(lhs.reg));
};
let mul_i64x2_avx512 = |this: &mut Self| {
this.asm
.xmm_rm_rvex3(Avx512Opcode::Vpmullq, lhs.reg, rhs.reg, writable!(lhs.reg));
};
let mul_i64x2_fallback =
|this: &mut Self, context: &mut CodeGenContext<Emission>| -> Result<()> {
let tmp1 = regs::scratch_xmm();
let tmp2 = context.any_fpr(this)?;
this.asm
.xmm_vex_ri(AvxOpcode::Vpsrlq, lhs.reg, 32, writable!(tmp1));
this.asm
.xmm_vex_rr(AvxOpcode::Vpmuldq, tmp1, rhs.reg, writable!(tmp2));
this.asm
.xmm_vex_ri(AvxOpcode::Vpsrlq, rhs.reg, 32, writable!(tmp1));
this.asm
.xmm_vex_rr(AvxOpcode::Vpmuludq, tmp1, lhs.reg, writable!(tmp1));
this.asm
.xmm_vex_rr(AvxOpcode::Vpaddq, tmp1, tmp2, writable!(tmp1));
this.asm
.xmm_vex_ri(AvxOpcode::Vpsllq, tmp1, 32, writable!(tmp1));
this.asm
.xmm_vex_rr(AvxOpcode::Vpmuludq, lhs.reg, rhs.reg, writable!(tmp2));
this.asm
.xmm_vex_rr(AvxOpcode::Vpaddq, tmp1, tmp2, writable!(lhs.reg));
context.free_reg(tmp2);
Ok(())
};
match kind {
V128MulKind::F32x4 => mul_avx(self, AvxOpcode::Vmulps),
V128MulKind::F64x2 => mul_avx(self, AvxOpcode::Vmulpd),
V128MulKind::I16x8 => mul_avx(self, AvxOpcode::Vpmullw),
V128MulKind::I32x4 => mul_avx(self, AvxOpcode::Vpmulld),
V128MulKind::I64x2
if self.ensure_has_avx512vl().is_ok() && self.ensure_has_avx512dq().is_ok() =>
{
mul_i64x2_avx512(self)
}
V128MulKind::I64x2 => mul_i64x2_fallback(self, context)?,
}
context.stack.push(lhs.into());
context.free_reg(rhs);
Ok(())
}
fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128AbsKind::I8x16 | V128AbsKind::I16x8 | V128AbsKind::I32x4 => {
self.asm.xmm_vpabs_rr(src, dst, kind.lane_size())
}
V128AbsKind::I64x2 => {
let scratch = writable!(regs::scratch_xmm());
self.asm.xmm_vpsra_rri(src, scratch, 0x1f, OperandSize::S32);
self.asm
.xmm_vpshuf_rr(scratch.to_reg(), scratch, 0b11_11_01_01, OperandSize::S32);
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, src, scratch.to_reg(), dst);
self.asm
.xmm_vpsub_rrr(dst.to_reg(), scratch.to_reg(), dst, kind.lane_size());
}
V128AbsKind::F32x4 | V128AbsKind::F64x2 => {
let scratch = writable!(regs::scratch_xmm());
self.asm.xmm_vpcmpeq_rrr(
scratch,
scratch.to_reg(),
scratch.to_reg(),
kind.lane_size(),
);
self.asm
.xmm_vpsrl_rr(scratch.to_reg(), scratch, 0x1, kind.lane_size());
self.asm
.xmm_vandp_rrr(src, scratch.to_reg(), dst, kind.lane_size());
}
}
Ok(())
}
fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()> {
self.ensure_has_avx()?;
let tmp = regs::scratch_xmm();
match kind {
V128NegKind::I8x16 | V128NegKind::I16x8 | V128NegKind::I32x4 | V128NegKind::I64x2 => {
self.v128_xor(tmp, tmp, writable!(tmp))?;
self.v128_sub(tmp, op.to_reg(), op, kind.into())?;
}
V128NegKind::F32x4 | V128NegKind::F64x2 => {
self.asm
.xmm_vpcmpeq_rrr(writable!(tmp), tmp, tmp, kind.lane_size());
self.asm.xmm_vpsll_rr(
tmp,
writable!(tmp),
(kind.lane_size().num_bits() - 1) as u32,
kind.lane_size(),
);
self.asm
.xmm_vxorp_rrr(op.to_reg(), tmp, op, kind.lane_size());
}
}
Ok(())
}
fn v128_shift(
&mut self,
context: &mut CodeGenContext<Emission>,
lane_width: OperandSize,
kind: ShiftKind,
) -> Result<()> {
self.ensure_has_avx()?;
let shift_amount = context.pop_to_reg(self, None)?.reg;
let operand = context.pop_to_reg(self, None)?.reg;
let tmp_xmm = regs::scratch_xmm();
let tmp = regs::scratch();
let amount_mask = lane_width.num_bits() - 1;
self.and(
writable!(shift_amount),
shift_amount,
RegImm::i32(amount_mask as i32),
OperandSize::S32,
)?;
let shl_normal = |this: &mut Self, op: AvxOpcode| {
this.asm
.avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32);
this.asm
.xmm_vex_rr(op, operand, tmp_xmm, writable!(operand));
};
let shift_i8x16 = |this: &mut Self, masks: &'static [u8], op: AvxOpcode| {
this.asm
.avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32);
this.asm
.xmm_vex_rr(op, operand, tmp_xmm, writable!(operand));
let masks_addr = this.asm.add_constant(masks);
this.asm.lea(&masks_addr, writable!(tmp), OperandSize::S64);
this.asm
.shift_ir(4, writable!(shift_amount), ShiftKind::Shl, OperandSize::S32);
this.asm.xmm_vmovdqu_mr(
&Address::ImmRegRegShift {
simm32: 0,
base: tmp,
index: shift_amount,
shift: 0,
},
writable!(tmp_xmm),
MemFlags::trusted(),
);
this.asm
.xmm_vex_rr(AvxOpcode::Vpand, tmp_xmm, operand, writable!(operand));
};
let i64x2_shr_s = |this: &mut Self, context: &mut CodeGenContext<Emission>| -> Result<()> {
const SIGN_MASK: u128 = 0x8000000000000000_8000000000000000;
let tmp_xmm2 = context.any_fpr(this)?;
this.asm
.avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32);
let cst = this.asm.add_constant(&SIGN_MASK.to_le_bytes());
this.asm
.xmm_vmovdqu_mr(&cst, writable!(tmp_xmm2), MemFlags::trusted());
this.asm
.xmm_vex_rr(AvxOpcode::Vpsrlq, tmp_xmm2, tmp_xmm, writable!(tmp_xmm2));
this.asm
.xmm_vex_rr(AvxOpcode::Vpsrlq, operand, tmp_xmm, writable!(operand));
this.asm
.xmm_vex_rr(AvxOpcode::Vpxor, operand, tmp_xmm2, writable!(operand));
this.asm
.xmm_vex_rr(AvxOpcode::Vpsubq, operand, tmp_xmm2, writable!(operand));
context.free_reg(tmp_xmm2);
Ok(())
};
let i8x16_shr_s = |this: &mut Self, context: &mut CodeGenContext<Emission>| -> Result<()> {
this.asm
.add_ir(8, writable!(shift_amount), OperandSize::S32);
this.asm
.avx_gpr_to_xmm(shift_amount, writable!(tmp_xmm), OperandSize::S32);
let tmp_lo = context.any_fpr(this)?;
let tmp_hi = context.any_fpr(this)?;
this.asm
.xmm_vex_rr(AvxOpcode::Vpunpcklbw, operand, operand, writable!(tmp_lo));
this.asm
.xmm_vex_rr(AvxOpcode::Vpunpckhbw, operand, operand, writable!(tmp_hi));
this.asm
.xmm_vex_rr(AvxOpcode::Vpsraw, tmp_lo, tmp_xmm, writable!(tmp_lo));
this.asm
.xmm_vex_rr(AvxOpcode::Vpsraw, tmp_hi, tmp_xmm, writable!(tmp_hi));
this.asm
.xmm_vex_rr(AvxOpcode::Vpacksswb, tmp_lo, tmp_hi, writable!(operand));
context.free_reg(tmp_lo);
context.free_reg(tmp_hi);
Ok(())
};
match (lane_width, kind) {
(OperandSize::S8, ShiftKind::Shl) => {
shift_i8x16(self, &I8X16_ISHL_MASKS, AvxOpcode::Vpsllw)
}
(OperandSize::S16, ShiftKind::Shl) => shl_normal(self, AvxOpcode::Vpsllw),
(OperandSize::S32, ShiftKind::Shl) => shl_normal(self, AvxOpcode::Vpslld),
(OperandSize::S64, ShiftKind::Shl) => shl_normal(self, AvxOpcode::Vpsllq),
(OperandSize::S8, ShiftKind::ShrU) => {
shift_i8x16(self, &I8X16_USHR_MASKS, AvxOpcode::Vpsrlw)
}
(OperandSize::S16, ShiftKind::ShrU) => shl_normal(self, AvxOpcode::Vpsrlw),
(OperandSize::S32, ShiftKind::ShrU) => shl_normal(self, AvxOpcode::Vpsrld),
(OperandSize::S64, ShiftKind::ShrU) => shl_normal(self, AvxOpcode::Vpsrlq),
(OperandSize::S8, ShiftKind::ShrS) => i8x16_shr_s(self, context)?,
(OperandSize::S16, ShiftKind::ShrS) => shl_normal(self, AvxOpcode::Vpsraw),
(OperandSize::S32, ShiftKind::ShrS) => shl_normal(self, AvxOpcode::Vpsrad),
(OperandSize::S64, ShiftKind::ShrS) => i64x2_shr_s(self, context)?,
_ => bail!(CodeGenError::invalid_operand_combination()),
}
context.free_reg(shift_amount);
context
.stack
.push(TypedReg::new(WasmValType::V128, operand).into());
Ok(())
}
fn v128_q15mulr_sat_s(
&mut self,
lhs: Reg,
rhs: Reg,
dst: WritableReg,
size: OperandSize,
) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vpmulhrs_rrr(lhs, rhs, dst, size);
let address = self.asm.add_constant(&[
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
0x00, 0x80,
]);
self.asm
.xmm_vpcmpeq_rrm(writable!(rhs), dst.to_reg(), &address, size);
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, dst.to_reg(), rhs, dst);
Ok(())
}
fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
let scratch = regs::scratch_xmm();
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, scratch, scratch, writable!(scratch));
self.asm.xmm_vpcmpeq_rrr(writable!(src), src, scratch, size);
self.asm.xmm_vptest(src, src);
self.asm.setcc(IntCmpKind::Eq, dst);
Ok(())
}
fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
match size {
OperandSize::S8 => self.asm.xmm_vpmovmsk_rr(src, dst, size, OperandSize::S32),
OperandSize::S16 => {
self.asm
.xmm_vpackss_rrr(src, src, writable!(src), OperandSize::S8);
self.asm
.xmm_vpmovmsk_rr(src, dst, OperandSize::S8, OperandSize::S32);
self.asm
.shift_ir(0x8, dst, ShiftKind::ShrU, OperandSize::S32);
}
OperandSize::S32 | OperandSize::S64 => self.asm.xmm_vmovskp_rr(src, dst, size, size),
_ => unimplemented!(),
}
Ok(())
}
fn v128_trunc(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: V128TruncKind,
) -> Result<()> {
self.ensure_has_avx()?;
let reg = writable!(context.pop_to_reg(self, None)?.reg);
match kind {
V128TruncKind::F32x4 | V128TruncKind::F64x2 => self.asm.xmm_vroundp_rri(
reg.to_reg(),
reg,
VroundMode::TowardZero,
kind.dst_lane_size(),
),
V128TruncKind::I32x4FromF32x4S => {
self.v128_trunc_sat_f32x4_s(reg, kind.src_lane_size(), kind.dst_lane_size());
}
V128TruncKind::I32x4FromF32x4U => {
let temp_reg = writable!(context.any_fpr(self)?);
self.v128_trunc_sat_f32x4_u(
reg,
temp_reg,
kind.src_lane_size(),
kind.dst_lane_size(),
);
context.free_reg(temp_reg.to_reg());
}
V128TruncKind::I32x4FromF64x2SZero => {
self.v128_trunc_sat_f64x2_s_zero(reg, kind.src_lane_size());
}
V128TruncKind::I32x4FromF64x2UZero => {
self.v128_trunc_sat_f64x2_u_zero(reg, kind.src_lane_size(), kind.dst_lane_size());
}
}
context.stack.push(TypedReg::v128(reg.to_reg()).into());
Ok(())
}
fn v128_min(
&mut self,
src1: Reg,
src2: Reg,
dst: WritableReg,
kind: V128MinKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128MinKind::I8x16S
| V128MinKind::I8x16U
| V128MinKind::I16x8S
| V128MinKind::I16x8U
| V128MinKind::I32x4S
| V128MinKind::I32x4U => {
let op = match kind {
V128MinKind::I8x16S => AvxOpcode::Vpminsb,
V128MinKind::I8x16U => AvxOpcode::Vpminub,
V128MinKind::I16x8S => AvxOpcode::Vpminsw,
V128MinKind::I16x8U => AvxOpcode::Vpminuw,
V128MinKind::I32x4S => AvxOpcode::Vpminsd,
V128MinKind::I32x4U => AvxOpcode::Vpminud,
_ => unreachable!(),
};
self.asm.xmm_vex_rr(op, src1, src2, dst);
}
V128MinKind::F32x4 | V128MinKind::F64x2 => {
let scratch = writable!(regs::scratch_xmm());
self.asm
.xmm_vminp_rrr(src1, src2, scratch, kind.lane_size());
self.asm.xmm_vminp_rrr(src2, src1, dst, kind.lane_size());
self.asm
.xmm_vorp_rrr(dst.to_reg(), scratch.to_reg(), dst, kind.lane_size());
self.asm.xmm_vcmpp_rrr(
writable!(src2),
src2,
dst.to_reg(),
kind.lane_size(),
VcmpKind::Unord,
);
self.asm
.xmm_vorp_rrr(src2, dst.to_reg(), dst, kind.lane_size());
self.canonicalize_nans(writable!(src2), dst, kind.lane_size());
}
}
Ok(())
}
fn v128_max(
&mut self,
src1: Reg,
src2: Reg,
dst: WritableReg,
kind: V128MaxKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128MaxKind::I8x16S
| V128MaxKind::I8x16U
| V128MaxKind::I16x8S
| V128MaxKind::I16x8U
| V128MaxKind::I32x4S
| V128MaxKind::I32x4U => {
let op = match kind {
V128MaxKind::I8x16S => AvxOpcode::Vpmaxsb,
V128MaxKind::I8x16U => AvxOpcode::Vpmaxub,
V128MaxKind::I16x8S => AvxOpcode::Vpmaxsw,
V128MaxKind::I16x8U => AvxOpcode::Vpmaxuw,
V128MaxKind::I32x4S => AvxOpcode::Vpmaxsd,
V128MaxKind::I32x4U => AvxOpcode::Vpmaxud,
_ => unreachable!(),
};
self.asm.xmm_vex_rr(op, src1, src2, dst);
}
V128MaxKind::F32x4 | V128MaxKind::F64x2 => {
let scratch = writable!(regs::scratch_xmm());
self.asm
.xmm_vmaxp_rrr(src1, src2, scratch, kind.lane_size());
self.asm.xmm_vmaxp_rrr(src2, src1, dst, kind.lane_size());
self.asm
.xmm_vxorp_rrr(dst.to_reg(), scratch.to_reg(), dst, kind.lane_size());
self.asm.xmm_vorp_rrr(
dst.to_reg(),
scratch.to_reg(),
writable!(src2),
kind.lane_size(),
);
self.asm
.xmm_vsub_rrr(src2, dst.to_reg(), dst, kind.lane_size());
self.asm.xmm_vcmpp_rrr(
writable!(src2),
src2,
src2,
kind.lane_size(),
VcmpKind::Unord,
);
self.canonicalize_nans(writable!(src2), dst, kind.lane_size());
}
}
Ok(())
}
fn v128_extmul(
&mut self,
context: &mut CodeGenContext<Emission>,
kind: V128ExtMulKind,
) -> Result<()> {
self.ensure_has_avx()?;
let src1 = context.pop_to_reg(self, None)?;
let src2 = context.pop_to_reg(self, None)?;
let ext_kind = kind.into();
self.v128_extend(src1.reg, writable!(src1.reg), ext_kind)?;
self.v128_extend(src2.reg, writable!(src2.reg), ext_kind)?;
context.stack.push(src2.into());
context.stack.push(src1.into());
self.v128_mul(context, kind.into())
}
fn v128_extadd_pairwise(
&mut self,
src: Reg,
dst: WritableReg,
kind: V128ExtAddKind,
) -> Result<()> {
self.ensure_has_avx()?;
match kind {
V128ExtAddKind::I8x16S => {
let scratch = regs::scratch_xmm();
let mask = self.asm.add_constant(&[1; 16]);
self.asm.xmm_mov_mr(
&mask,
writable!(scratch),
OperandSize::S128,
MemFlags::trusted(),
);
self.asm
.xmm_vex_rr(AvxOpcode::Vpmaddubsw, scratch, src, dst);
}
V128ExtAddKind::I8x16U => {
let mask = self.asm.add_constant(&[1; 16]);
self.asm
.xmm_vpmaddubs_rmr(src, &mask, dst, OperandSize::S16);
}
V128ExtAddKind::I16x8S => {
let mask = self
.asm
.add_constant(&[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]);
self.asm.xmm_vpmaddwd_rmr(src, &mask, dst);
}
V128ExtAddKind::I16x8U => {
let xor_mask = self.asm.add_constant(&[
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
0x80, 0x00, 0x80,
]);
self.asm.xmm_vpxor_rmr(src, &xor_mask, dst);
let madd_mask = self
.asm
.add_constant(&[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]);
self.asm.xmm_vpmaddwd_rmr(dst.to_reg(), &madd_mask, dst);
let add_mask = self
.asm
.add_constant(&[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]);
self.asm
.xmm_vpadd_rmr(dst.to_reg(), &add_mask, dst, OperandSize::S32);
}
}
Ok(())
}
fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vex_rr(AvxOpcode::Vpmaddwd, lhs, rhs, dst);
Ok(())
}
fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()> {
self.ensure_has_avx()?;
let reg = writable!(context.pop_to_reg(self, None)?.reg);
let scratch = writable!(regs::scratch_xmm());
let address = self.asm.add_constant(&[
0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
0x0F, 0x0F,
]);
self.asm.xmm_vpand_rrm(reg.to_reg(), &address, scratch);
self.asm
.xmm_vpsrl_rr(reg.to_reg(), reg, 0x4, OperandSize::S16);
self.asm.xmm_vpand_rrm(reg.to_reg(), &address, reg);
let address = self.asm.add_constant(&[
0x0, 0x1, 0x1, 0x2, 0x1, 0x2, 0x2, 0x3, 0x1, 0x2, 0x2, 0x3, 0x2, 0x3, 0x3, 0x4,
]);
let reg2 = writable!(context.any_fpr(self)?);
self.asm
.xmm_mov_mr(&address, reg2, OperandSize::S128, MemFlags::trusted());
self.asm.xmm_vpshufb_rrr(reg, reg2.to_reg(), reg.to_reg());
self.asm
.xmm_vpshufb_rrr(scratch, reg2.to_reg(), scratch.to_reg());
context.free_reg(reg2.to_reg());
self.asm
.xmm_vpadd_rrr(reg.to_reg(), scratch.to_reg(), reg, OperandSize::S8);
context.stack.push(TypedReg::v128(reg.to_reg()).into());
Ok(())
}
fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vpavg_rrr(lhs, rhs, dst, size);
Ok(())
}
fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vdivp_rrr(lhs, rhs, dst, size);
Ok(())
}
fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vsqrtp_rr(src, dst, size);
Ok(())
}
fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm
.xmm_vroundp_rri(src, dst, VroundMode::TowardPositiveInfinity, size);
Ok(())
}
fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm
.xmm_vroundp_rri(src, dst, VroundMode::TowardNegativeInfinity, size);
Ok(())
}
fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm
.xmm_vroundp_rri(src, dst, VroundMode::TowardNearest, size);
Ok(())
}
fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vminp_rrr(rhs, lhs, dst, size);
Ok(())
}
fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()> {
self.ensure_has_avx()?;
self.asm.xmm_vmaxp_rrr(rhs, lhs, dst, size);
Ok(())
}
}
impl MacroAssembler {
pub fn new(
ptr_size: impl PtrSize,
shared_flags: settings::Flags,
isa_flags: x64_settings::Flags,
) -> Result<Self> {
let ptr_type: WasmValType = ptr_type_from_ptr_size(ptr_size.size());
Ok(Self {
sp_offset: 0,
sp_max: 0,
stack_max_use_add: None,
asm: Assembler::new(shared_flags.clone(), isa_flags.clone()),
flags: isa_flags,
shared_flags,
ptr_size: ptr_type.try_into()?,
})
}
fn add_stack_max(&mut self, reg: Reg) {
assert!(self.stack_max_use_add.is_none());
let patch = PatchableAddToReg::new(reg, OperandSize::S64, self.asm.buffer_mut());
self.stack_max_use_add.replace(patch);
}
fn ensure_has_avx(&self) -> Result<()> {
anyhow::ensure!(self.flags.has_avx(), CodeGenError::UnimplementedForNoAvx);
Ok(())
}
fn ensure_has_avx2(&self) -> Result<()> {
anyhow::ensure!(self.flags.has_avx2(), CodeGenError::UnimplementedForNoAvx2);
Ok(())
}
fn ensure_has_avx512vl(&self) -> Result<()> {
anyhow::ensure!(
self.flags.has_avx512vl(),
CodeGenError::UnimplementedForNoAvx512VL
);
Ok(())
}
fn ensure_has_avx512dq(&self) -> Result<()> {
anyhow::ensure!(
self.flags.has_avx512dq(),
CodeGenError::UnimplementedForNoAvx512DQ
);
Ok(())
}
fn increment_sp(&mut self, bytes: u32) {
self.sp_offset += bytes;
self.sp_max = self.sp_max.max(self.sp_offset);
}
fn decrement_sp(&mut self, bytes: u32) {
assert!(
self.sp_offset >= bytes,
"sp offset = {}; bytes = {}",
self.sp_offset,
bytes
);
self.sp_offset -= bytes;
}
fn load_constant(&mut self, constant: &I, dst: WritableReg, size: OperandSize) -> Result<()> {
match constant {
I::I32(v) => Ok(self.asm.mov_ir(*v as u64, dst, size)),
I::I64(v) => Ok(self.asm.mov_ir(*v, dst, size)),
I::F32(_) => Ok(self.asm.load_fp_const(dst, &constant.to_bytes(), size)),
I::F64(_) => Ok(self.asm.load_fp_const(dst, &constant.to_bytes(), size)),
I::V128(_) => Ok(self.asm.load_fp_const(dst, &constant.to_bytes(), size)),
}
}
fn load_impl(
&mut self,
src: Address,
dst: WritableReg,
size: OperandSize,
flags: MemFlags,
) -> Result<()> {
if dst.to_reg().is_int() {
let ext = size.extend_to::<Zero>(OperandSize::S64);
self.asm.movzx_mr(&src, dst, ext, flags);
} else {
self.asm.xmm_mov_mr(&src, dst, size, flags);
}
Ok(())
}
fn store_impl(
&mut self,
src: RegImm,
dst: Address,
size: OperandSize,
flags: MemFlags,
) -> Result<()> {
let _ = match src {
RegImm::Imm(imm) => match imm {
I::I32(v) => self.asm.mov_im(v as i32, &dst, size, flags),
I::I64(v) => match v.try_into() {
Ok(v) => self.asm.mov_im(v, &dst, size, flags),
Err(_) => {
let scratch = regs::scratch();
self.asm.mov_ir(v, writable!(scratch), size);
self.asm.mov_rm(scratch, &dst, size, flags);
}
},
I::F32(v) => {
let addr = self.asm.add_constant(v.to_le_bytes().as_slice());
let float_scratch = regs::scratch_xmm();
self.asm
.xmm_mov_mr(&addr, writable!(float_scratch), size, MemFlags::trusted());
self.asm.xmm_mov_rm(float_scratch, &dst, size, flags);
}
I::F64(v) => {
let addr = self.asm.add_constant(v.to_le_bytes().as_slice());
let float_scratch = regs::scratch_xmm();
self.asm
.xmm_mov_mr(&addr, writable!(float_scratch), size, MemFlags::trusted());
self.asm.xmm_mov_rm(float_scratch, &dst, size, flags);
}
I::V128(v) => {
let addr = self.asm.add_constant(v.to_le_bytes().as_slice());
let vector_scratch = regs::scratch_xmm();
self.asm.xmm_mov_mr(
&addr,
writable!(vector_scratch),
size,
MemFlags::trusted(),
);
self.asm.xmm_mov_rm(vector_scratch, &dst, size, flags);
}
},
RegImm::Reg(reg) => {
if reg.is_int() {
self.asm.mov_rm(reg, &dst, size, flags);
} else {
self.asm.xmm_mov_rm(reg, &dst, size, flags);
}
}
};
Ok(())
}
fn ensure_two_argument_form(dst: &Reg, lhs: &Reg) -> Result<()> {
if dst != lhs {
Err(anyhow!(CodeGenError::invalid_two_arg_form()))
} else {
Ok(())
}
}
fn vpshuf_mask_for_64_bit_splats() -> u8 {
0b01_00_01_00
}
fn v128_trunc_sat_f32x4_s(
&mut self,
reg: WritableReg,
src_lane_size: OperandSize,
dst_lane_size: OperandSize,
) {
let scratch = writable!(regs::scratch_xmm());
self.asm.xmm_vcmpp_rrr(
scratch,
reg.to_reg(),
reg.to_reg(),
src_lane_size,
VcmpKind::Eq,
);
self.asm
.xmm_vandp_rrr(reg.to_reg(), scratch.to_reg(), reg, src_lane_size);
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, scratch.to_reg(), reg.to_reg(), scratch);
self.asm.xmm_vcvt_rr(reg.to_reg(), reg, VcvtKind::F32ToI32);
self.asm
.xmm_vex_rr(AvxOpcode::Vpand, reg.to_reg(), scratch.to_reg(), scratch);
self.asm
.xmm_vpsra_rri(scratch.to_reg(), scratch, 0x1F, dst_lane_size);
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, reg.to_reg(), scratch.to_reg(), reg);
}
fn v128_trunc_sat_f32x4_u(
&mut self,
reg: WritableReg,
temp_reg: WritableReg,
src_lane_size: OperandSize,
dst_lane_size: OperandSize,
) {
let scratch = writable!(regs::scratch_xmm());
self.asm
.xmm_vxorp_rrr(reg.to_reg(), reg.to_reg(), scratch, src_lane_size);
self.asm
.xmm_vmaxp_rrr(reg.to_reg(), scratch.to_reg(), reg, src_lane_size);
self.asm
.xmm_vpcmpeq_rrr(scratch, scratch.to_reg(), scratch.to_reg(), src_lane_size);
self.asm
.xmm_vpsrl_rr(scratch.to_reg(), scratch, 0x1, src_lane_size);
self.asm
.xmm_vcvt_rr(scratch.to_reg(), scratch, VcvtKind::I32ToF32);
self.asm
.xmm_vcvt_rr(reg.to_reg(), temp_reg, VcvtKind::F32ToI32);
self.asm
.xmm_vsub_rrr(reg.to_reg(), scratch.to_reg(), reg, dst_lane_size);
self.asm.xmm_vcmpp_rrr(
scratch,
scratch.to_reg(),
reg.to_reg(),
dst_lane_size,
VcmpKind::Le,
);
self.asm.xmm_vcvt_rr(reg.to_reg(), reg, VcvtKind::F32ToI32);
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, reg.to_reg(), scratch.to_reg(), scratch);
self.asm
.xmm_vex_rr(AvxOpcode::Vpxor, reg.to_reg(), reg.to_reg(), reg);
self.asm
.xmm_vpmaxs_rrr(reg, scratch.to_reg(), reg.to_reg(), dst_lane_size);
self.asm
.xmm_vpadd_rrr(reg.to_reg(), temp_reg.to_reg(), reg, dst_lane_size);
}
fn v128_trunc_sat_f64x2_s_zero(&mut self, reg: WritableReg, src_lane_size: OperandSize) {
let scratch = writable!(regs::scratch_xmm());
self.asm.xmm_vcmpp_rrr(
scratch,
reg.to_reg(),
reg.to_reg(),
src_lane_size,
VcmpKind::Eq,
);
let address = self.asm.add_constant(&[
0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xFF, 0xDF, 0x41, 0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xFF,
0xDF, 0x41,
]);
self.asm
.xmm_vandp_rrm(scratch.to_reg(), &address, scratch, src_lane_size);
self.asm
.xmm_vminp_rrr(reg.to_reg(), scratch.to_reg(), reg, src_lane_size);
self.asm.xmm_vcvt_rr(reg.to_reg(), reg, VcvtKind::F64ToI32);
}
fn v128_trunc_sat_f64x2_u_zero(
&mut self,
reg: WritableReg,
src_lane_size: OperandSize,
dst_lane_size: OperandSize,
) {
let scratch = writable!(regs::scratch_xmm());
self.asm
.xmm_vxorp_rrr(scratch.to_reg(), scratch.to_reg(), scratch, src_lane_size);
self.asm
.xmm_vmaxp_rrr(reg.to_reg(), scratch.to_reg(), reg, src_lane_size);
let address = self.asm.add_constant(&[
0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF,
0xEF, 0x41,
]);
self.asm
.xmm_vminp_rrm(reg.to_reg(), &address, reg, src_lane_size);
self.asm
.xmm_vroundp_rri(reg.to_reg(), reg, VroundMode::TowardZero, src_lane_size);
let address = self.asm.add_constant(&[
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x30, 0x43,
]);
self.asm
.xmm_vaddp_rrm(reg.to_reg(), &address, reg, src_lane_size);
self.asm.xmm_vshufp_rrri(
reg.to_reg(),
scratch.to_reg(),
reg,
0b10_00_10_00,
dst_lane_size,
);
}
fn canonicalize_nans(&mut self, mask: WritableReg, dst: WritableReg, size: OperandSize) {
let amount_to_shift = 1 + size.mantissa_bits() + 1;
self.asm
.xmm_vpsrl_rr(mask.to_reg(), mask, amount_to_shift as u32, size);
self.asm
.xmm_vandnp_rrr(mask.to_reg(), dst.to_reg(), dst, size);
}
}