use core::{arch::asm, ops::*};
#[derive(Copy, Clone, PartialEq, PartialOrd)]
#[repr(transparent)]
pub struct Normal<F>(pub F);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
const MASK: u32 = 0x8040;
#[cfg(target_arch = "aarch64")]
const MASK: u64 = 1 << 24;
macro_rules! impl_op {
($op:ident, $fn:ident, $raw:ty, $x86_instr:literal, $arm_instr:literal,) => {
impl $op for Normal<$raw> {
type Output = Self;
fn $fn(self, rhs: Self) -> Self {
let Self(x) = self;
let Self(y) = rhs;
let mut res: $raw;
unsafe {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let mxcsr_save: u32 = 0;
let mxcsr_tweak: u32 = 0;
asm!(
"stmxcsr [{mxcsr_save:r}]",
"stmxcsr [{mxcsr_tweak:r}]",
"or DWORD PTR [{mxcsr_tweak:r}], {mask}",
"ldmxcsr [{mxcsr_tweak:r}]",
$x86_instr,
"ldmxcsr [{mxcsr_save:r}]",
"/* {a} */",
x = inout(xmm_reg) x => res,
y = in(xmm_reg) y,
a = out(xmm_reg) _,
mxcsr_save = in(reg) &mxcsr_save,
mxcsr_tweak = in(reg) &mxcsr_tweak,
mask = const MASK,
);
}
#[cfg(target_arch = "aarch64")]
asm!(
"mrs {fpcr_save:x}, fpcr",
"mrs {fpcr_tweak:x}, fpcr",
"orr {fpcr_tweak:x}, {fpcr_tweak:x}, {mask}",
"msr fpcr, {fpcr_tweak:x}",
$arm_instr,
"msr fpcr, {fpcr_save:x}",
"/* {a} */",
res = lateout(vreg) res,
x = in(vreg) x,
y = in(vreg) y,
a = out(vreg) _,
fpcr_save = out(reg) _,
fpcr_tweak = out(reg) _,
mask = const MASK,
);
}
Self(res)
}
}
}
}
macro_rules! impl_op_assign {
($op:ident, $fn:ident, $raw:ty, $x86_instr:literal, $arm_instr:literal,) => {
impl $op for Normal<$raw> {
fn $fn(&mut self, rhs: Self) {
let Self(x) = self;
let Self(y) = rhs;
unsafe {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let mxcsr_save: u32 = 0;
let mxcsr_tweak: u32 = 0;
asm!(
"stmxcsr [{mxcsr_save:r}]",
"stmxcsr [{mxcsr_tweak:r}]",
"or DWORD PTR [{mxcsr_tweak:r}], {mask}",
"ldmxcsr [{mxcsr_tweak:r}]",
$x86_instr,
"ldmxcsr [{mxcsr_save:r}]",
"/* {a} */",
x = inout(xmm_reg) x,
y = in(xmm_reg) y,
a = out(xmm_reg) _,
mxcsr_save = in(reg) &mxcsr_save,
mxcsr_tweak = in(reg) &mxcsr_tweak,
mask = const MASK,
);
}
#[cfg(target_arch = "aarch64")]
asm!(
"mrs {fpcr_save:x}, fpcr",
"mrs {fpcr_tweak:x}, fpcr",
"orr {fpcr_tweak:x}, {fpcr_tweak:x}, {mask}",
"msr fpcr, {fpcr_tweak:x}",
$arm_instr,
"msr fpcr, {fpcr_save:x}",
"/* {a} */",
x = inout(vreg) *x,
y = in(vreg) y,
a = out(vreg) _,
fpcr_save = out(reg) _,
fpcr_tweak = out(reg) _,
mask = const MASK,
);
}
}
}
}
}
impl_op!(
Add,
add,
f32,
"addss {x}, {y}",
"fadd {res:s}, {x:s}, {y:s}",
);
impl_op!(
Add,
add,
f64,
"addsd {x}, {y}",
"fadd {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
AddAssign,
add_assign,
f32,
"addss {x}, {y}",
"fadd {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
AddAssign,
add_assign,
f64,
"addsd {x}, {y}",
"fadd {x:d}, {x:d}, {y:d}",
);
impl_op!(
Sub,
sub,
f32,
"subss {x}, {y}",
"fsub {res:s}, {x:s}, {y:s}",
);
impl_op!(
Sub,
sub,
f64,
"subsd {x}, {y}",
"fsub {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
SubAssign,
sub_assign,
f32,
"subss {x}, {y}",
"fsub {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
SubAssign,
sub_assign,
f64,
"subsd {x}, {y}",
"fsub {x:d}, {x:d}, {y:d}",
);
impl_op!(
Mul,
mul,
f32,
"mulss {x}, {y}",
"fmul {res:s}, {x:s}, {y:s}",
);
impl_op!(
Mul,
mul,
f64,
"mulsd {x}, {y}",
"fmul {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
MulAssign,
mul_assign,
f32,
"mulss {x}, {y}",
"fmul {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
MulAssign,
mul_assign,
f64,
"mulsd {x}, {y}",
"fmul {x:d}, {x:d}, {y:d}",
);
impl_op!(
Div,
div,
f32,
"divss {x}, {y}",
"fdiv {res:s}, {x:s}, {y:s}",
);
impl_op!(
Div,
div,
f64,
"divsd {x}, {y}",
"fdiv {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
DivAssign,
div_assign,
f32,
"divss {x}, {y}",
"fdiv {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
DivAssign,
div_assign,
f64,
"divsd {x}, {y}",
"fdiv {x:d}, {x:d}, {y:d}",
);