sparse_ir/
fpu_check.rs

1//! FPU state checking and correction for numerical stability
2//!
3//! This module detects dangerous FPU settings (particularly Flush-to-Zero and
4//! Denormals-Are-Zero flags) that can cause incorrect SVD results when called
5//! from Intel Fortran programs compiled with `-O3` without `-fp-model precise`.
6//!
7//! # Background
8//!
9//! Intel Fortran's `-O3` optimization may set the MXCSR register's FZ and DAZ bits
10//! at program startup for performance. However, this causes problems for SVD
11//! computations that rely on proper handling of denormalized (subnormal) numbers.
12//!
13//! # Usage
14//!
15//! The [`FpuGuard`] RAII guard automatically saves, corrects, and restores FPU state:
16//!
17//! ```ignore
18//! let _guard = FpuGuard::new_protect_computation();
19//! // Computation here - FZ/DAZ are disabled
20//! // FPU state is automatically restored when _guard is dropped
21//! ```
22//!
23//! # Performance
24//!
25//! The `stmxcsr`/`ldmxcsr` instructions are very lightweight (a few CPU cycles),
26//! so the overhead of checking and restoring FPU state is negligible compared
27//! to actual matrix computations.
28
29use once_cell::sync::Lazy;
30use std::sync::atomic::{AtomicBool, Ordering};
31
32/// MXCSR bit positions
33const MXCSR_FZ_BIT: u32 = 15; // Flush to Zero
34const MXCSR_DAZ_BIT: u32 = 6; // Denormals Are Zero
35
36/// Flag to track if warning has been shown (only show once per process)
37static WARNING_SHOWN: AtomicBool = AtomicBool::new(false);
38
39/// Lazy initialization to check FPU state at library load time
40static FPU_CHECK_INIT: Lazy<bool> = Lazy::new(|| {
41    let state = get_fpu_state();
42    if state.is_dangerous() {
43        print_fpu_warning(&state);
44        WARNING_SHOWN.store(true, Ordering::SeqCst);
45        true // dangerous state detected
46    } else {
47        false
48    }
49});
50
51/// Result of FPU state check
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct FpuState {
54    /// Raw MXCSR register value
55    pub mxcsr: u32,
56    /// Flush to Zero flag
57    pub flush_to_zero: bool,
58    /// Denormals Are Zero flag
59    pub denormals_are_zero: bool,
60}
61
62impl FpuState {
63    /// Check if FPU state is dangerous for numerical computation
64    pub fn is_dangerous(&self) -> bool {
65        self.flush_to_zero || self.denormals_are_zero
66    }
67}
68
69impl std::fmt::Display for FpuState {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        write!(
72            f,
73            "MXCSR=0x{:08X}, FZ={}, DAZ={}",
74            self.mxcsr, self.flush_to_zero as u8, self.denormals_are_zero as u8
75        )
76    }
77}
78
79/// Print FPU warning message (called only once)
80fn print_fpu_warning(state: &FpuState) {
81    eprintln!();
82    eprintln!("================================================================================");
83    eprintln!("sparse-ir WARNING: Dangerous FPU settings detected!");
84    eprintln!("================================================================================");
85    eprintln!();
86    eprintln!("  Current FPU state: {}", state);
87    eprintln!();
88    eprintln!("  Problem: Flush-to-Zero (FZ) or Denormals-Are-Zero (DAZ) is enabled.");
89    eprintln!("           This causes subnormal numbers to be treated as zero, which");
90    eprintln!("           can produce INCORRECT results in SVD/SVE computations.");
91    eprintln!();
92    eprintln!("  Common cause: Intel Fortran compiler (ifort/ifx) with -O3 optimization");
93    eprintln!("                sets FZ/DAZ flags at program startup for performance.");
94    eprintln!();
95    eprintln!("  Solution: Add '-fp-model precise' flag when compiling your Fortran code:");
96    eprintln!();
97    eprintln!("      ifort -O3 -fp-model precise your_program.f90");
98    eprintln!("      ifx   -O3 -fp-model precise your_program.f90");
99    eprintln!();
100    eprintln!("  For Quantum ESPRESSO/EPW, add to make.inc:");
101    eprintln!();
102    eprintln!("      FFLAGS += -fp-model precise");
103    eprintln!();
104    eprintln!("  Action: sparse-ir will temporarily disable FZ/DAZ during each computation");
105    eprintln!("          and restore the original settings afterward.");
106    eprintln!("          Results will be correct, but please fix the compiler flags");
107    eprintln!("          to avoid this warning.");
108    eprintln!();
109    eprintln!("================================================================================");
110    eprintln!();
111}
112
113/// Get current FPU state (x86/x86_64 only)
114#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
115pub fn get_fpu_state() -> FpuState {
116    let mut mxcsr: u32 = 0;
117    unsafe {
118        std::arch::asm!(
119            "stmxcsr [{}]",
120            in(reg) &mut mxcsr,
121            options(nostack)
122        );
123    }
124
125    FpuState {
126        mxcsr,
127        flush_to_zero: (mxcsr >> MXCSR_FZ_BIT) & 1 != 0,
128        denormals_are_zero: (mxcsr >> MXCSR_DAZ_BIT) & 1 != 0,
129    }
130}
131
132/// Get current FPU state (non-x86 fallback)
133#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
134pub fn get_fpu_state() -> FpuState {
135    // On non-x86 platforms, assume safe defaults
136    FpuState {
137        mxcsr: 0,
138        flush_to_zero: false,
139        denormals_are_zero: false,
140    }
141}
142
143/// Set MXCSR register value (x86/x86_64 only)
144#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
145fn set_mxcsr(value: u32) {
146    unsafe {
147        std::arch::asm!(
148            "ldmxcsr [{}]",
149            in(reg) &value,
150            options(nostack)
151        );
152    }
153}
154
155/// Set MXCSR register value (non-x86 fallback - no-op)
156#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
157fn set_mxcsr(_value: u32) {
158    // No-op on non-x86 platforms
159}
160
161/// Initialize FPU check (call this early to trigger warning if needed)
162///
163/// This function triggers the lazy initialization which checks FPU state
164/// and prints a warning if dangerous settings are detected.
165/// The warning is only printed once per process.
166pub fn init_fpu_check() {
167    let _ = *FPU_CHECK_INIT;
168}
169
170/// RAII guard that protects a computation from dangerous FPU settings
171///
172/// On creation:
173/// 1. Triggers one-time FPU check and warning (if not already done)
174/// 2. If FZ or DAZ is enabled, disables them temporarily
175///
176/// On drop:
177/// - Restores the original FPU state
178///
179/// # Example
180///
181/// ```ignore
182/// {
183///     let _guard = FpuGuard::new_protect_computation();
184///     // Computation here - FZ/DAZ are disabled
185///     perform_svd_computation();
186/// } // Original FPU state is restored here
187/// ```
188///
189/// # Performance
190///
191/// The overhead is negligible (a few CPU cycles for stmxcsr/ldmxcsr).
192pub struct FpuGuard {
193    original_mxcsr: u32,
194    needs_restore: bool,
195}
196
197impl FpuGuard {
198    /// Create a new guard that protects computation from FZ/DAZ
199    ///
200    /// - Triggers one-time warning if dangerous FPU settings are detected
201    /// - Temporarily disables FZ/DAZ if they are enabled
202    /// - Restores original state when dropped
203    pub fn new_protect_computation() -> Self {
204        // Trigger one-time FPU check and warning
205        let _ = *FPU_CHECK_INIT;
206
207        let state = get_fpu_state();
208        let original_mxcsr = state.mxcsr;
209
210        if state.is_dangerous() {
211            // Clear FZ and DAZ bits
212            let safe_mxcsr = original_mxcsr & !((1 << MXCSR_FZ_BIT) | (1 << MXCSR_DAZ_BIT));
213            set_mxcsr(safe_mxcsr);
214
215            Self {
216                original_mxcsr,
217                needs_restore: true,
218            }
219        } else {
220            Self {
221                original_mxcsr,
222                needs_restore: false,
223            }
224        }
225    }
226
227    /// Check if the guard needed to modify FPU state
228    pub fn was_modified(&self) -> bool {
229        self.needs_restore
230    }
231}
232
233impl Drop for FpuGuard {
234    fn drop(&mut self) {
235        if self.needs_restore {
236            // Restore original FPU state
237            set_mxcsr(self.original_mxcsr);
238        }
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_get_fpu_state() {
248        let state = get_fpu_state();
249        // Just verify we can read the state without panicking
250        println!("Current FPU state: {}", state);
251    }
252
253    #[test]
254    fn test_fpu_guard_creation() {
255        let guard = FpuGuard::new_protect_computation();
256        // Guard should be created successfully
257        drop(guard);
258    }
259
260    #[test]
261    fn test_fpu_state_display() {
262        let state = FpuState {
263            mxcsr: 0x1F80,
264            flush_to_zero: false,
265            denormals_are_zero: false,
266        };
267        let display = format!("{}", state);
268        assert!(display.contains("MXCSR=0x00001F80"));
269        assert!(display.contains("FZ=0"));
270        assert!(display.contains("DAZ=0"));
271    }
272
273    #[test]
274    fn test_fpu_state_dangerous() {
275        let safe_state = FpuState {
276            mxcsr: 0x1F80,
277            flush_to_zero: false,
278            denormals_are_zero: false,
279        };
280        assert!(!safe_state.is_dangerous());
281
282        let dangerous_fz = FpuState {
283            mxcsr: 0x9F80,
284            flush_to_zero: true,
285            denormals_are_zero: false,
286        };
287        assert!(dangerous_fz.is_dangerous());
288
289        let dangerous_daz = FpuState {
290            mxcsr: 0x1FC0,
291            flush_to_zero: false,
292            denormals_are_zero: true,
293        };
294        assert!(dangerous_daz.is_dangerous());
295    }
296
297    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
298    #[test]
299    fn test_fpu_guard_restores_state() {
300        let original_state = get_fpu_state();
301
302        {
303            let _guard = FpuGuard::new_protect_computation();
304            // State might be modified here
305        }
306
307        let restored_state = get_fpu_state();
308        assert_eq!(original_state.mxcsr, restored_state.mxcsr);
309    }
310}