use once_cell::sync::Lazy;
use std::sync::atomic::{AtomicBool, Ordering};
const MXCSR_FZ_BIT: u32 = 15; const MXCSR_DAZ_BIT: u32 = 6;
static WARNING_SHOWN: AtomicBool = AtomicBool::new(false);
static FPU_CHECK_INIT: Lazy<bool> = Lazy::new(|| {
let state = get_fpu_state();
if state.is_dangerous() {
print_fpu_warning(&state);
WARNING_SHOWN.store(true, Ordering::SeqCst);
true } else {
false
}
});
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct FpuState {
pub mxcsr: u32,
pub flush_to_zero: bool,
pub denormals_are_zero: bool,
}
impl FpuState {
pub fn is_dangerous(&self) -> bool {
self.flush_to_zero || self.denormals_are_zero
}
}
impl std::fmt::Display for FpuState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"MXCSR=0x{:08X}, FZ={}, DAZ={}",
self.mxcsr, self.flush_to_zero as u8, self.denormals_are_zero as u8
)
}
}
fn print_fpu_warning(state: &FpuState) {
eprintln!();
eprintln!("================================================================================");
eprintln!("sparse-ir WARNING: Dangerous FPU settings detected!");
eprintln!("================================================================================");
eprintln!();
eprintln!(" Current FPU state: {}", state);
eprintln!();
eprintln!(" Problem: Flush-to-Zero (FZ) or Denormals-Are-Zero (DAZ) is enabled.");
eprintln!(" This causes subnormal numbers to be treated as zero, which");
eprintln!(" can produce INCORRECT results in SVD/SVE computations.");
eprintln!();
eprintln!(" Common cause: Intel Fortran compiler (ifort/ifx) with -O3 optimization");
eprintln!(" sets FZ/DAZ flags at program startup for performance.");
eprintln!();
eprintln!(" Solution: Add '-fp-model precise' flag when compiling your Fortran code:");
eprintln!();
eprintln!(" ifort -O3 -fp-model precise your_program.f90");
eprintln!(" ifx -O3 -fp-model precise your_program.f90");
eprintln!();
eprintln!(" For Quantum ESPRESSO/EPW, add to make.inc:");
eprintln!();
eprintln!(" FFLAGS += -fp-model precise");
eprintln!();
eprintln!(" Action: sparse-ir will temporarily disable FZ/DAZ during each computation");
eprintln!(" and restore the original settings afterward.");
eprintln!(" Results will be correct, but please fix the compiler flags");
eprintln!(" to avoid this warning.");
eprintln!();
eprintln!("================================================================================");
eprintln!();
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub fn get_fpu_state() -> FpuState {
let mut mxcsr: u32 = 0;
unsafe {
std::arch::asm!(
"stmxcsr [{}]",
in(reg) &mut mxcsr,
options(nostack)
);
}
FpuState {
mxcsr,
flush_to_zero: (mxcsr >> MXCSR_FZ_BIT) & 1 != 0,
denormals_are_zero: (mxcsr >> MXCSR_DAZ_BIT) & 1 != 0,
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
pub fn get_fpu_state() -> FpuState {
FpuState {
mxcsr: 0,
flush_to_zero: false,
denormals_are_zero: false,
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn set_mxcsr(value: u32) {
unsafe {
std::arch::asm!(
"ldmxcsr [{}]",
in(reg) &value,
options(nostack)
);
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
fn set_mxcsr(_value: u32) {
}
pub fn init_fpu_check() {
let _ = *FPU_CHECK_INIT;
}
pub struct FpuGuard {
original_mxcsr: u32,
needs_restore: bool,
}
impl FpuGuard {
pub fn new_protect_computation() -> Self {
let _ = *FPU_CHECK_INIT;
let state = get_fpu_state();
let original_mxcsr = state.mxcsr;
if state.is_dangerous() {
let safe_mxcsr = original_mxcsr & !((1 << MXCSR_FZ_BIT) | (1 << MXCSR_DAZ_BIT));
set_mxcsr(safe_mxcsr);
Self {
original_mxcsr,
needs_restore: true,
}
} else {
Self {
original_mxcsr,
needs_restore: false,
}
}
}
pub fn was_modified(&self) -> bool {
self.needs_restore
}
}
impl Drop for FpuGuard {
fn drop(&mut self) {
if self.needs_restore {
set_mxcsr(self.original_mxcsr);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_fpu_state() {
let state = get_fpu_state();
println!("Current FPU state: {}", state);
}
#[test]
fn test_fpu_guard_creation() {
let guard = FpuGuard::new_protect_computation();
drop(guard);
}
#[test]
fn test_fpu_state_display() {
let state = FpuState {
mxcsr: 0x1F80,
flush_to_zero: false,
denormals_are_zero: false,
};
let display = format!("{}", state);
assert!(display.contains("MXCSR=0x00001F80"));
assert!(display.contains("FZ=0"));
assert!(display.contains("DAZ=0"));
}
#[test]
fn test_fpu_state_dangerous() {
let safe_state = FpuState {
mxcsr: 0x1F80,
flush_to_zero: false,
denormals_are_zero: false,
};
assert!(!safe_state.is_dangerous());
let dangerous_fz = FpuState {
mxcsr: 0x9F80,
flush_to_zero: true,
denormals_are_zero: false,
};
assert!(dangerous_fz.is_dangerous());
let dangerous_daz = FpuState {
mxcsr: 0x1FC0,
flush_to_zero: false,
denormals_are_zero: true,
};
assert!(dangerous_daz.is_dangerous());
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[test]
fn test_fpu_guard_restores_state() {
let original_state = get_fpu_state();
{
let _guard = FpuGuard::new_protect_computation();
}
let restored_state = get_fpu_state();
assert_eq!(original_state.mxcsr, restored_state.mxcsr);
}
}