normal_floats 0.2.0

Implement floats without denormal numbers without UB
Documentation
use core::{arch::asm, ops::*};

#[derive(Copy, Clone, PartialEq, PartialOrd)]
#[repr(transparent)]
pub struct Normal<F>(pub F);

// FTZ and DAZ
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
const MASK: u32 = 0x8040;

// FZ
#[cfg(target_arch = "aarch64")]
const MASK: u64 = 1 << 24;

macro_rules! impl_op {
    ($op:ident, $fn:ident, $raw:ty, $x86_instr:literal, $arm_instr:literal,) => {
        impl $op for Normal<$raw> {
            type Output = Self;

            fn $fn(self, rhs: Self) -> Self {
                let Self(x) = self;
                let Self(y) = rhs;
                let mut res: $raw;
                unsafe {
                    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
                    {
                        let mxcsr_save: u32 = 0;
                        let mxcsr_tweak: u32 = 0;
                        asm!(
                            "stmxcsr [{mxcsr_save:r}]",
                            "stmxcsr [{mxcsr_tweak:r}]",
                            "or DWORD PTR [{mxcsr_tweak:r}], {mask}",
                            "ldmxcsr [{mxcsr_tweak:r}]",
                            $x86_instr,
                            "ldmxcsr [{mxcsr_save:r}]",
                            "/* {a} */",
                            x = inout(xmm_reg) x => res,
                            y = in(xmm_reg) y,
                            a = out(xmm_reg) _,
                            mxcsr_save = in(reg) &mxcsr_save,
                            mxcsr_tweak = in(reg) &mxcsr_tweak,
                            mask = const MASK,
                        );
                    }
                    #[cfg(target_arch = "aarch64")]
                    asm!(
                        "mrs {fpcr_save:x}, fpcr",
                        "mrs {fpcr_tweak:x}, fpcr",
                        "orr {fpcr_tweak:x}, {fpcr_tweak:x}, {mask}",
                        "msr fpcr, {fpcr_tweak:x}",
                        $arm_instr,
                        "msr fpcr, {fpcr_save:x}",
                        "/* {a} */",
                        res = lateout(vreg) res,
                        x = in(vreg) x,
                        y = in(vreg) y,
                        a = out(vreg) _,
                        fpcr_save = out(reg) _,
                        fpcr_tweak = out(reg) _,
                        mask = const MASK,
                    );
                }
                Self(res)
            }
        }
    }
}

macro_rules! impl_op_assign {
    ($op:ident, $fn:ident, $raw:ty, $x86_instr:literal, $arm_instr:literal,) => {
        impl $op for Normal<$raw> {
            fn $fn(&mut self, rhs: Self) {
                let Self(x) = self;
                let Self(y) = rhs;
                unsafe {
                    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
                    {
                        let mxcsr_save: u32 = 0;
                        let mxcsr_tweak: u32 = 0;
                        asm!(
                            "stmxcsr [{mxcsr_save:r}]",
                            "stmxcsr [{mxcsr_tweak:r}]",
                            "or DWORD PTR [{mxcsr_tweak:r}], {mask}",
                            "ldmxcsr [{mxcsr_tweak:r}]",
                            $x86_instr,
                            "ldmxcsr [{mxcsr_save:r}]",
                            "/* {a} */",
                            x = inout(xmm_reg) x,
                            y = in(xmm_reg) y,
                            a = out(xmm_reg) _,
                            mxcsr_save = in(reg) &mxcsr_save,
                            mxcsr_tweak = in(reg) &mxcsr_tweak,
                            mask = const MASK,
                        );
                    }
                    #[cfg(target_arch = "aarch64")]
                    asm!(
                        "mrs {fpcr_save:x}, fpcr",
                        "mrs {fpcr_tweak:x}, fpcr",
                        "orr {fpcr_tweak:x}, {fpcr_tweak:x}, {mask}",
                        "msr fpcr, {fpcr_tweak:x}",
                        $arm_instr,
                        "msr fpcr, {fpcr_save:x}",
                        "/* {a} */",
                        x = inout(vreg) *x,
                        y = in(vreg) y,
                        a = out(vreg) _,
                        fpcr_save = out(reg) _,
                        fpcr_tweak = out(reg) _,
                        mask = const MASK,
                    );
                }
            }
        }
    }
}

impl_op!(
    Add,
    add,
    f32,
    "addss {x}, {y}",
    "fadd {res:s}, {x:s}, {y:s}",
);
impl_op!(
    Add,
    add,
    f64,
    "addsd {x}, {y}",
    "fadd {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
    AddAssign,
    add_assign,
    f32,
    "addss {x}, {y}",
    "fadd {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
    AddAssign,
    add_assign,
    f64,
    "addsd {x}, {y}",
    "fadd {x:d}, {x:d}, {y:d}",
);
impl_op!(
    Sub,
    sub,
    f32,
    "subss {x}, {y}",
    "fsub {res:s}, {x:s}, {y:s}",
);
impl_op!(
    Sub,
    sub,
    f64,
    "subsd {x}, {y}",
    "fsub {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
    SubAssign,
    sub_assign,
    f32,
    "subss {x}, {y}",
    "fsub {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
    SubAssign,
    sub_assign,
    f64,
    "subsd {x}, {y}",
    "fsub {x:d}, {x:d}, {y:d}",
);
impl_op!(
    Mul,
    mul,
    f32,
    "mulss {x}, {y}",
    "fmul {res:s}, {x:s}, {y:s}",
);
impl_op!(
    Mul,
    mul,
    f64,
    "mulsd {x}, {y}",
    "fmul {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
    MulAssign,
    mul_assign,
    f32,
    "mulss {x}, {y}",
    "fmul {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
    MulAssign,
    mul_assign,
    f64,
    "mulsd {x}, {y}",
    "fmul {x:d}, {x:d}, {y:d}",
);
impl_op!(
    Div,
    div,
    f32,
    "divss {x}, {y}",
    "fdiv {res:s}, {x:s}, {y:s}",
);
impl_op!(
    Div,
    div,
    f64,
    "divsd {x}, {y}",
    "fdiv {res:d}, {x:d}, {y:d}",
);
impl_op_assign!(
    DivAssign,
    div_assign,
    f32,
    "divss {x}, {y}",
    "fdiv {x:s}, {x:s}, {y:s}",
);
impl_op_assign!(
    DivAssign,
    div_assign,
    f64,
    "divsd {x}, {y}",
    "fdiv {x:d}, {x:d}, {y:d}",
);

/*
// rem is x - (x / y).trunc() * y
impl_op!(
    Rem,
    rem,
    f32,
    "
    movss {a}, {x}
    divss {a}, {y}
    roundss {a}, {a}, 3
    mulss {a}, {y}
    subss {x}, {a}
    ",
    "
    fdiv {a:s}, {x:s}, {y:s}
    frintz {a:s}, {a:s}
    fmul {a:s}, {a:s}, {y:s}
    fsub {res:s}, {x:s}, {a:s}
    ",
);
impl_op!(
    Rem,
    rem,
    f64,
    "
    movsd {a}, {x}
    divsd {a}, {y}
    roundsd {a}, {a}, 3
    mulsd {a}, {y}
    subsd {x}, {a}
    ",
    "
    fdiv {a:d}, {x:d}, {y:d}
    frintz {a:d}, {a:d}
    fmul {a:d}, {a:d}, {y:d}
    fsub {res:d}, {x:d}, {a:d}
    ",
);
*/