use core::marker::PhantomData;
#[cfg(target_feature = "sse")]
const SSE_FTZ_BIT: u32 = 1 << 15;
#[cfg(target_arch = "aarch64")]
const AARCH64_FTZ_BIT: u64 = 1 << 24;
pub(crate) struct ScopedFtz {
should_disable_again: bool,
_send_sync_marker: PhantomData<*const ()>,
}
impl ScopedFtz {
pub fn enable() -> Self {
#[cfg(not(miri))]
{
#[cfg(target_feature = "sse")]
{
let mut mxcsr: u32 = 0;
unsafe { std::arch::asm!("stmxcsr [{}]", in(reg) &mut mxcsr) };
let should_disable_again = mxcsr & SSE_FTZ_BIT == 0;
if should_disable_again {
unsafe { std::arch::asm!("ldmxcsr [{}]", in(reg) &(mxcsr | SSE_FTZ_BIT)) };
}
return Self {
should_disable_again,
_send_sync_marker: PhantomData,
};
}
#[cfg(target_arch = "aarch64")]
{
let mut fpcr: u64;
unsafe { std::arch::asm!("mrs {}, fpcr", out(reg) fpcr) };
let should_disable_again = fpcr & AARCH64_FTZ_BIT == 0;
if should_disable_again {
unsafe { std::arch::asm!("msr fpcr, {}", in(reg) fpcr | AARCH64_FTZ_BIT) };
}
return Self {
should_disable_again,
_send_sync_marker: PhantomData,
};
}
}
#[allow(unreachable_code)] Self {
should_disable_again: false,
_send_sync_marker: PhantomData,
}
}
}
impl Drop for ScopedFtz {
fn drop(&mut self) {
#[cfg(not(miri))]
if self.should_disable_again {
#[cfg(target_feature = "sse")]
{
let mut mxcsr: u32 = 0;
unsafe { std::arch::asm!("stmxcsr [{}]", in(reg) &mut mxcsr) };
unsafe { std::arch::asm!("ldmxcsr [{}]", in(reg) &(mxcsr & !SSE_FTZ_BIT)) };
}
#[cfg(target_arch = "aarch64")]
{
let mut fpcr: u64;
unsafe { std::arch::asm!("mrs {}, fpcr", out(reg) fpcr) };
unsafe { std::arch::asm!("msr fpcr, {}", in(reg) fpcr & !AARCH64_FTZ_BIT) };
}
}
}
}