1use once_cell::sync::Lazy;
30use std::sync::atomic::{AtomicBool, Ordering};
31
32const MXCSR_FZ_BIT: u32 = 15; const MXCSR_DAZ_BIT: u32 = 6; static WARNING_SHOWN: AtomicBool = AtomicBool::new(false);
38
39static FPU_CHECK_INIT: Lazy<bool> = Lazy::new(|| {
41 let state = get_fpu_state();
42 if state.is_dangerous() {
43 print_fpu_warning(&state);
44 WARNING_SHOWN.store(true, Ordering::SeqCst);
45 true } else {
47 false
48 }
49});
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct FpuState {
54 pub mxcsr: u32,
56 pub flush_to_zero: bool,
58 pub denormals_are_zero: bool,
60}
61
62impl FpuState {
63 pub fn is_dangerous(&self) -> bool {
65 self.flush_to_zero || self.denormals_are_zero
66 }
67}
68
69impl std::fmt::Display for FpuState {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 write!(
72 f,
73 "MXCSR=0x{:08X}, FZ={}, DAZ={}",
74 self.mxcsr, self.flush_to_zero as u8, self.denormals_are_zero as u8
75 )
76 }
77}
78
79fn print_fpu_warning(state: &FpuState) {
81 eprintln!();
82 eprintln!("================================================================================");
83 eprintln!("sparse-ir WARNING: Dangerous FPU settings detected!");
84 eprintln!("================================================================================");
85 eprintln!();
86 eprintln!(" Current FPU state: {}", state);
87 eprintln!();
88 eprintln!(" Problem: Flush-to-Zero (FZ) or Denormals-Are-Zero (DAZ) is enabled.");
89 eprintln!(" This causes subnormal numbers to be treated as zero, which");
90 eprintln!(" can produce INCORRECT results in SVD/SVE computations.");
91 eprintln!();
92 eprintln!(" Common cause: Intel Fortran compiler (ifort/ifx) with -O3 optimization");
93 eprintln!(" sets FZ/DAZ flags at program startup for performance.");
94 eprintln!();
95 eprintln!(" Solution: Add '-fp-model precise' flag when compiling your Fortran code:");
96 eprintln!();
97 eprintln!(" ifort -O3 -fp-model precise your_program.f90");
98 eprintln!(" ifx -O3 -fp-model precise your_program.f90");
99 eprintln!();
100 eprintln!(" For Quantum ESPRESSO/EPW, add to make.inc:");
101 eprintln!();
102 eprintln!(" FFLAGS += -fp-model precise");
103 eprintln!();
104 eprintln!(" Action: sparse-ir will temporarily disable FZ/DAZ during each computation");
105 eprintln!(" and restore the original settings afterward.");
106 eprintln!(" Results will be correct, but please fix the compiler flags");
107 eprintln!(" to avoid this warning.");
108 eprintln!();
109 eprintln!("================================================================================");
110 eprintln!();
111}
112
113#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
115pub fn get_fpu_state() -> FpuState {
116 let mut mxcsr: u32 = 0;
117 unsafe {
118 std::arch::asm!(
119 "stmxcsr [{}]",
120 in(reg) &mut mxcsr,
121 options(nostack)
122 );
123 }
124
125 FpuState {
126 mxcsr,
127 flush_to_zero: (mxcsr >> MXCSR_FZ_BIT) & 1 != 0,
128 denormals_are_zero: (mxcsr >> MXCSR_DAZ_BIT) & 1 != 0,
129 }
130}
131
132#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
134pub fn get_fpu_state() -> FpuState {
135 FpuState {
137 mxcsr: 0,
138 flush_to_zero: false,
139 denormals_are_zero: false,
140 }
141}
142
143#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
145fn set_mxcsr(value: u32) {
146 unsafe {
147 std::arch::asm!(
148 "ldmxcsr [{}]",
149 in(reg) &value,
150 options(nostack)
151 );
152 }
153}
154
155#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
157fn set_mxcsr(_value: u32) {
158 }
160
161pub fn init_fpu_check() {
167 let _ = *FPU_CHECK_INIT;
168}
169
170pub struct FpuGuard {
193 original_mxcsr: u32,
194 needs_restore: bool,
195}
196
197impl FpuGuard {
198 pub fn new_protect_computation() -> Self {
204 let _ = *FPU_CHECK_INIT;
206
207 let state = get_fpu_state();
208 let original_mxcsr = state.mxcsr;
209
210 if state.is_dangerous() {
211 let safe_mxcsr = original_mxcsr & !((1 << MXCSR_FZ_BIT) | (1 << MXCSR_DAZ_BIT));
213 set_mxcsr(safe_mxcsr);
214
215 Self {
216 original_mxcsr,
217 needs_restore: true,
218 }
219 } else {
220 Self {
221 original_mxcsr,
222 needs_restore: false,
223 }
224 }
225 }
226
227 pub fn was_modified(&self) -> bool {
229 self.needs_restore
230 }
231}
232
233impl Drop for FpuGuard {
234 fn drop(&mut self) {
235 if self.needs_restore {
236 set_mxcsr(self.original_mxcsr);
238 }
239 }
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245
246 #[test]
247 fn test_get_fpu_state() {
248 let state = get_fpu_state();
249 println!("Current FPU state: {}", state);
251 }
252
253 #[test]
254 fn test_fpu_guard_creation() {
255 let guard = FpuGuard::new_protect_computation();
256 drop(guard);
258 }
259
260 #[test]
261 fn test_fpu_state_display() {
262 let state = FpuState {
263 mxcsr: 0x1F80,
264 flush_to_zero: false,
265 denormals_are_zero: false,
266 };
267 let display = format!("{}", state);
268 assert!(display.contains("MXCSR=0x00001F80"));
269 assert!(display.contains("FZ=0"));
270 assert!(display.contains("DAZ=0"));
271 }
272
273 #[test]
274 fn test_fpu_state_dangerous() {
275 let safe_state = FpuState {
276 mxcsr: 0x1F80,
277 flush_to_zero: false,
278 denormals_are_zero: false,
279 };
280 assert!(!safe_state.is_dangerous());
281
282 let dangerous_fz = FpuState {
283 mxcsr: 0x9F80,
284 flush_to_zero: true,
285 denormals_are_zero: false,
286 };
287 assert!(dangerous_fz.is_dangerous());
288
289 let dangerous_daz = FpuState {
290 mxcsr: 0x1FC0,
291 flush_to_zero: false,
292 denormals_are_zero: true,
293 };
294 assert!(dangerous_daz.is_dangerous());
295 }
296
297 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
298 #[test]
299 fn test_fpu_guard_restores_state() {
300 let original_state = get_fpu_state();
301
302 {
303 let _guard = FpuGuard::new_protect_computation();
304 }
306
307 let restored_state = get_fpu_state();
308 assert_eq!(original_state.mxcsr, restored_state.mxcsr);
309 }
310}