use crate::{
Assembler, AssemblerData, CHOICE_BOTH, CHOICE_LEFT, CHOICE_RIGHT, IMM_REG,
OFFSET, REGISTER_LIMIT, interval::IntervalAssembler, mmap::Mmap, reg,
};
use dynasmrt::{DynasmApi, DynasmError, DynasmLabelApi, dynasm};
use fidget_core::types::Interval;
const STACK_SIZE_UPPER: usize = 0x20; const STACK_SIZE_LOWER: usize = 0x60;
#[expect(clippy::useless_conversion)]
impl Assembler for IntervalAssembler {
type Data = Interval;
fn init(mmap: Mmap, slot_count: usize) -> Self {
let mut out = AssemblerData::new(mmap);
dynasm!(out.ops
; push rbp
; mov rbp, rsp
);
out.prepare_stack(slot_count, STACK_SIZE_UPPER + STACK_SIZE_LOWER);
dynasm!(out.ops
; vzeroupper
);
Self(out)
}
fn build_load(&mut self, dst_reg: u8, src_mem: u32) {
assert!((dst_reg as usize) < REGISTER_LIMIT);
let sp_offset: i32 = (self.0.stack_pos(src_mem)
+ STACK_SIZE_LOWER as u32)
.try_into()
.unwrap();
dynasm!(self.0.ops
; movq Rx(reg(dst_reg)), [rsp + sp_offset]
);
}
fn build_store(&mut self, dst_mem: u32, src_reg: u8) {
assert!((src_reg as usize) < REGISTER_LIMIT);
let sp_offset: i32 = (self.0.stack_pos(dst_mem)
+ STACK_SIZE_LOWER as u32)
.try_into()
.unwrap();
dynasm!(self.0.ops
; movq [rsp + sp_offset], Rx(reg(src_reg))
);
}
fn build_input(&mut self, out_reg: u8, src_arg: u32) {
let pos = 8 * i32::try_from(src_arg).unwrap();
dynasm!(self.0.ops
; vmovq Rx(reg(out_reg)), [rdi + pos]
);
}
fn build_output(&mut self, arg_reg: u8, out_index: u32) {
let pos = 8 * i32::try_from(out_index).unwrap();
dynasm!(self.0.ops
; vmovq [rcx + pos], Rx(reg(arg_reg))
);
}
fn build_sin(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn interval_sin(v: Interval) -> Interval {
v.sin()
}
self.call_fn_unary(out_reg, lhs_reg, interval_sin);
}
fn build_cos(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_cos(f: Interval) -> Interval {
f.cos()
}
self.call_fn_unary(out_reg, lhs_reg, float_cos);
}
fn build_tan(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_tan(f: Interval) -> Interval {
f.tan()
}
self.call_fn_unary(out_reg, lhs_reg, float_tan);
}
fn build_asin(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_asin(f: Interval) -> Interval {
f.asin()
}
self.call_fn_unary(out_reg, lhs_reg, float_asin);
}
fn build_acos(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_acos(f: Interval) -> Interval {
f.acos()
}
self.call_fn_unary(out_reg, lhs_reg, float_acos);
}
fn build_atan(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_atan(f: Interval) -> Interval {
f.atan()
}
self.call_fn_unary(out_reg, lhs_reg, float_atan);
}
fn build_exp(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_exp(f: Interval) -> Interval {
f.exp()
}
self.call_fn_unary(out_reg, lhs_reg, float_exp);
}
fn build_ln(&mut self, out_reg: u8, lhs_reg: u8) {
extern "sysv64" fn float_ln(f: Interval) -> Interval {
f.ln()
}
self.call_fn_unary(out_reg, lhs_reg, float_ln);
}
fn build_copy(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vmovq Rx(reg(out_reg)), Rx(reg(lhs_reg))
);
}
fn build_neg(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vpshufd Rx(reg(out_reg)), Rx(reg(lhs_reg)), 0b11110001u8 as i8
; pcmpeqd xmm0, xmm0 ; pslld xmm0, 31 ; vxorps Rx(reg(out_reg)), Rx(reg(out_reg)), xmm0
);
}
fn build_abs(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vpxor xmm0, xmm0, xmm0
; vpshufd xmm1, Rx(reg(lhs_reg)), 1
; comiss xmm0, xmm1
; ja >N
; comiss xmm0, Rx(reg(lhs_reg))
; ja >S
; vmovq Rx(reg(out_reg)), Rx(reg(lhs_reg))
; jmp >E
; N:
; vpcmpeqd xmm0, xmm0, xmm0 ; vpslld xmm0, xmm0, 31 ; vxorps xmm0, xmm0, Rx(reg(lhs_reg)) ; vpshufd Rx(reg(out_reg)), xmm0, 1 ; jmp >E
; S:
; vpcmpeqd xmm0, xmm0, xmm0 ; vpsrld xmm0, xmm0, 1
; vandps Rx(reg(out_reg)), Rx(reg(lhs_reg)), xmm0
; vpshufd xmm0, Rx(reg(out_reg)), 0b11110001u8 as i8
; vcomiss xmm0, Rx(reg(out_reg)) ; ja >C
; vpshufd Rx(reg(out_reg)), Rx(reg(out_reg)), 0b11110011u8 as i8
; C:
; xor eax, eax
; vpinsrd Rx(reg(out_reg)), Rx(reg(out_reg)), eax, 0
; E:
);
self.0.ops.commit_local().unwrap();
}
fn build_recip(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vpxor xmm0, xmm0, xmm0 ; vcomiss Rx(reg(lhs_reg)), xmm0
; ja >O ; vpshufd xmm1, Rx(reg(lhs_reg)), 1 ; vcomiss xmm0, xmm1
; ja >O
; pcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg))
; pslld Rx(reg(out_reg)), 23
; psrld Rx(reg(out_reg)), 1
; jmp >E
; O: ; pcmpeqw xmm0, xmm0
; pslld xmm0, 25
; psrld xmm0, 2
; vdivps Rx(reg(out_reg)), xmm0, Rx(reg(lhs_reg))
; pshufd Rx(reg(out_reg)), Rx(reg(out_reg)), 0b0001
; E:
);
self.0.ops.commit_local().unwrap();
}
fn build_sqrt(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vpxor xmm0, xmm0, xmm0 ; vcomiss xmm0, Rx(reg(lhs_reg))
; ja >L
; vsqrtps Rx(reg(out_reg)), Rx(reg(lhs_reg))
; jmp >E
; L:
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; E:
);
self.0.ops.commit_local().unwrap();
}
fn build_square(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vmulps xmm2, Rx(reg(lhs_reg)), Rx(reg(lhs_reg))
; vpxor xmm0, xmm0, xmm0 ; vpshufd xmm1, Rx(reg(lhs_reg)), 1
; vcomiss xmm0, xmm1
; ja >N ; vcomiss xmm0, Rx(reg(lhs_reg))
; ja >S
; vmovq Rx(reg(out_reg)), xmm2
; jmp >E
; N:
; vpshufd Rx(reg(out_reg)), xmm2, 0b11110001u8 as i8
; jmp >E
; S:
; vpshufd Rx(reg(out_reg)), xmm2, 1
; vmaxss Rx(reg(out_reg)), Rx(reg(out_reg)), xmm2
; vpsllq Rx(reg(out_reg)), Rx(reg(out_reg)), 32
; E:
);
self.0.ops.commit_local().unwrap();
}
fn build_floor(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vroundps Rx(reg(out_reg)), Rx(reg(lhs_reg)), 1
);
}
fn build_ceil(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; vroundps Rx(reg(out_reg)), Rx(reg(lhs_reg)), 2
);
}
fn build_round(&mut self, out_reg: u8, lhs_reg: u8) {
dynasm!(self.0.ops
; mov eax, 0x80000000u32 as i32
; vmovd xmm1, eax
; vbroadcastss xmm1, xmm1
; vandps xmm1, xmm1, Rx(reg(lhs_reg))
; mov eax, 0x3effffffu32 as i32
; vmovd xmm2, eax
; vbroadcastss xmm2, xmm2
; vorps xmm1, xmm1, xmm2
; vaddps Rx(reg(out_reg)), xmm1, Rx(reg(lhs_reg))
; vroundps Rx(reg(out_reg)), Rx(reg(out_reg)), 3
);
}
fn build_add(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vaddps Rx(reg(out_reg)), Rx(reg(lhs_reg)), Rx(reg(rhs_reg))
);
}
fn build_sub(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vpshufd xmm1, Rx(reg(rhs_reg)), 0b11110001u8 as i8
; vsubps Rx(reg(out_reg)), Rx(reg(lhs_reg)), xmm1
);
}
fn build_mul(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vpshufd xmm2, Rx(reg(lhs_reg)), 0b01000001_i8
; vpshufd xmm1, Rx(reg(rhs_reg)), 0b00010001_i8
; vmulps xmm2, xmm2, xmm1
; vpshufd xmm1, xmm2, 0b00001110 ; vminps xmm1, xmm1, xmm2 ; vpshufd Rx(reg(out_reg)), xmm1, 0b00000001 ; vminss Rx(reg(out_reg)), Rx(reg(out_reg)), xmm1
; vpshufd xmm1, xmm2, 0b00001110 ; vmaxps xmm1, xmm1, xmm2 ; vpshufd xmm2, xmm1, 0b00000001 ; vmaxss xmm2, xmm2, xmm1
; vunpcklps Rx(reg(out_reg)), Rx(reg(out_reg)), xmm2
);
}
fn build_div(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vpxor xmm1, xmm1, xmm1 ; vcomiss Rx(reg(rhs_reg)), xmm1
; ja >O ; vpshufd xmm2, Rx(reg(rhs_reg)), 1
; vcomiss xmm1, xmm2
; ja >O
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; jmp >E
; O:
; vpshufd xmm2, Rx(reg(lhs_reg)), 0b01000001_i8
; vpshufd xmm1, Rx(reg(rhs_reg)), 0b00010001_i8
; vdivps xmm2, xmm2, xmm1
; vpshufd xmm1, xmm2, 0b00001110 ; vminps xmm1, xmm1, xmm2 ; vpshufd Rx(reg(out_reg)), xmm1, 0b00000001 ; vminss Rx(reg(out_reg)), Rx(reg(out_reg)), xmm1
; vpshufd xmm1, xmm2, 0b00001110 ; vmaxps xmm1, xmm1, xmm2 ; vpshufd xmm2, xmm1, 0b00000001 ; vmaxss xmm2, xmm2, xmm1
; vunpcklps Rx(reg(out_reg)), Rx(reg(out_reg)), xmm2
; E:
);
self.0.ops.commit_local().unwrap();
}
fn build_max(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vpshufd xmm1, Rx(reg(lhs_reg)), 0b11111101u8 as i8
; vcomiss xmm1, Rx(reg(rhs_reg)) ; jp >N ; jb >R
; vpshufd xmm1, Rx(reg(rhs_reg)), 0b11111101u8 as i8
; vcomiss xmm1, Rx(reg(lhs_reg))
; jp >N
; jb >L
; vmaxps Rx(reg(out_reg)), Rx(reg(lhs_reg)), Rx(reg(rhs_reg))
; or [rsi], CHOICE_BOTH as i8
; jmp >E
; N:
; or [rsi], CHOICE_BOTH as i8
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; jmp >E
; L:
; vmovq Rx(reg(out_reg)), Rx(reg(lhs_reg))
; or [rsi], CHOICE_LEFT as i8
; or [rdx], 1i8
; jmp >E
; R:
; vmovq Rx(reg(out_reg)), Rx(reg(rhs_reg))
; or [rsi], CHOICE_RIGHT as i8
; or [rdx], 1i8
; E:
; add rsi, 1
);
self.0.ops.commit_local().unwrap();
}
fn build_min(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vpshufd xmm1, Rx(reg(lhs_reg)), 0b11111101u8 as i8
; vcomiss xmm1, Rx(reg(rhs_reg)) ; jp >N
; jb >L
; vpshufd xmm1, Rx(reg(rhs_reg)), 0b11111101u8 as i8
; vcomiss xmm1, Rx(reg(lhs_reg))
; jp >N
; jb >R
; vminps Rx(reg(out_reg)), Rx(reg(lhs_reg)), Rx(reg(rhs_reg))
; or [rsi], CHOICE_BOTH as i8
; jmp >E
; N:
; or [rsi], CHOICE_BOTH as i8
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; jmp >E
; L:
; vmovq Rx(reg(out_reg)), Rx(reg(lhs_reg))
; or [rsi], CHOICE_LEFT as i8
; or [rdx], 1i8
; jmp >E
; R:
; vmovq Rx(reg(out_reg)), Rx(reg(rhs_reg))
; or [rsi], CHOICE_RIGHT as i8
; or [rdx], 1i8
; E:
; add rsi, 1
);
self.0.ops.commit_local().unwrap();
}
fn build_mod(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
extern "sysv64" fn interval_modulo(
lhs: Interval,
rhs: Interval,
) -> Interval {
lhs.rem_euclid(rhs)
}
self.call_fn_binary(out_reg, lhs_reg, rhs_reg, interval_modulo);
}
fn build_atan2(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
extern "sysv64" fn interval_atan2(
lhs: Interval,
rhs: Interval,
) -> Interval {
lhs.atan2(rhs)
}
self.call_fn_binary(out_reg, lhs_reg, rhs_reg, interval_atan2);
}
fn build_not(&mut self, out_reg: u8, arg_reg: u8) {
dynasm!(self.0.ops
; vpxor xmm0, xmm0, xmm0
; vpshufd xmm1, Rx(reg(arg_reg)), 0b11111101u8 as i8
; vcmpgtss xmm3, Rx(reg(arg_reg)), xmm0 ; vcmpltss xmm2, xmm1, xmm0 ; vorps xmm2, xmm2, xmm3
; vpcmpeqd xmm3, xmm3, xmm3 ; vxorpd xmm2, xmm2, xmm3
; vcmpeqss xmm3, Rx(reg(arg_reg)), xmm0
; vcmpeqss xmm1, xmm1, xmm0
; vandps xmm3, xmm1, xmm3
; mov eax, 1f32.to_bits() as i32
; vmovd xmm0, eax
; vandps xmm3, xmm3, xmm0
; vandps xmm2, xmm0, xmm2
; vunpcklps Rx(reg(out_reg)), xmm3, xmm2
);
}
fn build_and(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
assert_ne!(reg(lhs_reg), IMM_REG);
dynasm!(self.0.ops
; vcomiss Rx(reg(lhs_reg)), Rx(reg(lhs_reg))
; jp >N
; vcomiss Rx(reg(rhs_reg)), Rx(reg(rhs_reg))
; jnp >M
; N:
; or [rsi], CHOICE_BOTH as i8
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; jmp >E
; M:
; vpxor xmm1, xmm1, xmm1
; vcmpgtss xmm3, Rx(reg(lhs_reg)), xmm1 ; vpshufd xmm2, Rx(reg(lhs_reg)), 0b11111101u8 as i8 ; vcmpltss xmm2, xmm2, xmm1 ; vorps xmm2, xmm2, xmm3 ; vcomiss xmm1, xmm2 ; jnp >A
; vmovq Rx(reg(out_reg)), Rx(reg(rhs_reg))
; or [rsi], CHOICE_RIGHT as i8
; or [rdx], 1i8
; jmp >E
; A:
; vcmpeqss xmm3, Rx(reg(lhs_reg)), xmm1
; vpshufd xmm2, Rx(reg(lhs_reg)), 0b11111101u8 as i8 ; vcmpeqss xmm2, xmm2, xmm1
; vandps xmm3, xmm2, xmm3
; vcomiss xmm1, xmm3
; jnp >C
; vmovq Rx(reg(out_reg)), Rx(reg(lhs_reg))
; or [rsi], CHOICE_LEFT as i8
; or [rdx], 1i8
; jmp >E
; C:
; or [rsi], CHOICE_BOTH as i8
; vpshufd xmm2, Rx(reg(rhs_reg)), 0b11111101u8 as i8 ; vmaxss xmm2, xmm2, xmm1 ; vminss xmm1, Rx(reg(rhs_reg)), xmm1 ; vunpcklps Rx(reg(out_reg)), xmm1, xmm2
; E: ; add rsi, 1
);
self.0.ops.commit_local().unwrap();
}
fn build_or(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
assert_ne!(reg(lhs_reg), IMM_REG);
dynasm!(self.0.ops
; vcomiss Rx(reg(lhs_reg)), Rx(reg(lhs_reg))
; jp >N
; vcomiss Rx(reg(rhs_reg)), Rx(reg(rhs_reg))
; jnp >M
; N:
; or [rsi], CHOICE_BOTH as i8
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; jmp >E
; M:
; vpxor xmm1, xmm1, xmm1
; vcmpgtss xmm3, Rx(reg(lhs_reg)), xmm1 ; vpshufd xmm2, Rx(reg(lhs_reg)), 0b11111101u8 as i8 ; vcmpltss xmm2, xmm2, xmm1 ; vorps xmm2, xmm2, xmm3 ; vcomiss xmm1, xmm2 ; jnp >A
; vmovq Rx(reg(out_reg)), Rx(reg(lhs_reg))
; or [rsi], CHOICE_LEFT as i8
; or [rdx], 1i8
; jmp >E
; A:
; vcmpeqss xmm3, Rx(reg(lhs_reg)), xmm1
; vpshufd xmm2, Rx(reg(lhs_reg)), 0b11111101u8 as i8 ; vcmpeqss xmm2, xmm2, xmm1
; vandps xmm3, xmm2, xmm3
; vcomiss xmm1, xmm3
; jnp >C
; vmovq Rx(reg(out_reg)), Rx(reg(rhs_reg))
; or [rsi], CHOICE_RIGHT as i8
; or [rdx], 1i8
; jmp >E
; C:
; or [rsi], CHOICE_BOTH as i8
; vpshufd xmm2, Rx(reg(lhs_reg)), 0b11111101u8 as i8 ; vpshufd xmm1, Rx(reg(rhs_reg)), 0b11111101u8 as i8 ; vmaxss xmm1, xmm1, xmm2 ; vminss xmm2, Rx(reg(lhs_reg)), Rx(reg(rhs_reg))
; vunpcklps Rx(reg(out_reg)), xmm2, xmm1
; E: ; add rsi, 1
);
self.0.ops.commit_local().unwrap();
}
fn build_compare(&mut self, out_reg: u8, lhs_reg: u8, rhs_reg: u8) {
dynasm!(self.0.ops
; vpshufd xmm1, Rx(reg(lhs_reg)), 0b11111101u8 as i8
; vcomiss xmm1, Rx(reg(rhs_reg)) ; jp >N
; jb >L
; vpshufd xmm1, Rx(reg(rhs_reg)), 0b11111101u8 as i8
; vcomiss xmm1, Rx(reg(lhs_reg))
; jp >N
; jb >R
; mov eax, (-1f32).to_bits() as i32
; vpinsrd Rx(reg(out_reg)), Rx(reg(out_reg)), eax, 0
; mov eax, 1f32.to_bits() as i32
; vpinsrd Rx(reg(out_reg)), Rx(reg(out_reg)), eax, 1
; jmp >E
; N:
; vpcmpeqw Rx(reg(out_reg)), Rx(reg(out_reg)), Rx(reg(out_reg))
; vpslld Rx(reg(out_reg)), Rx(reg(out_reg)), 23
; vpsrld Rx(reg(out_reg)), Rx(reg(out_reg)), 1
; jmp >E
; L:
; mov eax, (-1f32).to_bits() as i32
; vmovd xmm1, eax
; vbroadcastss Rx(reg(out_reg)), xmm1
; jmp >E
; R:
; mov eax, 1f32.to_bits() as i32
; vmovd xmm1, eax
; vbroadcastss Rx(reg(out_reg)), xmm1
; E:
);
self.0.ops.commit_local().unwrap();
}
fn load_imm(&mut self, imm: f32) -> u8 {
let imm_u32 = imm.to_bits();
dynasm!(self.0.ops
; mov eax, imm_u32 as i32
; vmovd Rx(IMM_REG), eax
; vbroadcastss Rx(IMM_REG), Rx(IMM_REG)
);
IMM_REG.wrapping_sub(OFFSET)
}
fn finalize(mut self) -> Result<Mmap, DynasmError> {
if self.0.saved_callee_regs {
dynasm!(self.0.ops
; mov r12, [rbp - 0x8]
; mov r13, [rbp - 0x10]
; mov r14, [rbp - 0x18]
; mov r15, [rbp - 0x20]
);
}
self.0.finalize()
}
}
#[expect(clippy::useless_conversion)]
impl IntervalAssembler {
fn ensure_callee_regs_saved(&mut self) {
if !self.0.saved_callee_regs {
dynasm!(self.0.ops
; mov [rbp - 0x8], r12
; mov [rbp - 0x10], r13
; mov [rbp - 0x18], r14
; mov [rbp - 0x20], r15
);
self.0.saved_callee_regs = true
}
}
fn call_fn_unary(
&mut self,
out_reg: u8,
arg_reg: u8,
f: extern "sysv64" fn(Interval) -> Interval,
) {
self.ensure_callee_regs_saved();
let addr = f as usize;
dynasm!(self.0.ops
; mov r12, rdi
; mov r13, rsi
; mov r14, rdx
; mov r15, rcx
; vmovsd [rsp], xmm4
; vmovsd [rsp + 0x08], xmm5
; vmovsd [rsp + 0x10], xmm6
; vmovsd [rsp + 0x18], xmm7
; vmovsd [rsp + 0x20], xmm8
; vmovsd [rsp + 0x28], xmm9
; vmovsd [rsp + 0x30], xmm10
; vmovsd [rsp + 0x38], xmm11
; vmovsd [rsp + 0x40], xmm12
; vmovsd [rsp + 0x48], xmm13
; vmovsd [rsp + 0x50], xmm14
; vmovsd [rsp + 0x58], xmm15
; vmovq xmm0, Rx(reg(arg_reg))
; mov rsi, QWORD addr as _
; call rsi
; vmovsd xmm4, [rsp]
; vmovsd xmm5, [rsp + 0x08]
; vmovsd xmm6, [rsp + 0x10]
; vmovsd xmm7, [rsp + 0x18]
; vmovsd xmm8, [rsp + 0x20]
; vmovsd xmm9, [rsp + 0x28]
; vmovsd xmm10, [rsp + 0x30]
; vmovsd xmm11, [rsp + 0x38]
; vmovsd xmm12, [rsp + 0x40]
; vmovsd xmm13, [rsp + 0x48]
; vmovsd xmm14, [rsp + 0x50]
; vmovsd xmm15, [rsp + 0x58]
; mov rdi, r12
; mov rsi, r13
; mov rdx, r14
; mov rcx, r15
; vmovq Rx(reg(out_reg)), xmm0
);
}
fn call_fn_binary(
&mut self,
out_reg: u8,
lhs_reg: u8,
rhs_reg: u8,
f: extern "sysv64" fn(Interval, Interval) -> Interval,
) {
self.ensure_callee_regs_saved();
let addr = f as usize;
dynasm!(self.0.ops
; mov r12, rdi
; mov r13, rsi
; mov r14, rdx
; mov r15, rcx
; vmovsd [rsp], xmm4
; vmovsd [rsp + 0x08], xmm5
; vmovsd [rsp + 0x10], xmm6
; vmovsd [rsp + 0x18], xmm7
; vmovsd [rsp + 0x20], xmm8
; vmovsd [rsp + 0x28], xmm9
; vmovsd [rsp + 0x30], xmm10
; vmovsd [rsp + 0x38], xmm11
; vmovsd [rsp + 0x40], xmm12
; vmovsd [rsp + 0x48], xmm13
; vmovsd [rsp + 0x50], xmm14
; vmovsd [rsp + 0x58], xmm15
; vmovq xmm1, Rx(reg(rhs_reg))
; vmovq xmm0, Rx(reg(lhs_reg))
; mov rsi, QWORD addr as _
; call rsi
; vmovsd xmm4, [rsp]
; vmovsd xmm5, [rsp + 0x08]
; vmovsd xmm6, [rsp + 0x10]
; vmovsd xmm7, [rsp + 0x18]
; vmovsd xmm8, [rsp + 0x20]
; vmovsd xmm9, [rsp + 0x28]
; vmovsd xmm10, [rsp + 0x30]
; vmovsd xmm11, [rsp + 0x38]
; vmovsd xmm12, [rsp + 0x40]
; vmovsd xmm13, [rsp + 0x48]
; vmovsd xmm14, [rsp + 0x50]
; vmovsd xmm15, [rsp + 0x58]
; mov rdi, r12
; mov rsi, r13
; mov rdx, r14
; mov rcx, r15
; vmovq Rx(reg(out_reg)), xmm0
);
}
}