Skip to main content

truce_core/
denormal.rs

1//! Denormal flush guard for the audio thread.
2//!
3//! FTZ (flush-to-zero) and DAZ (denormals-are-zero) on `x86_64`,
4//! FZ (flush-to-zero) on `aarch64`. Set on entry to a plugin's
5//! `process()` and restored on drop, so the FPU control word the
6//! audio thread observes stays consistent across hosts and other
7//! plugins on the same thread.
8//!
9//! ## Why this matters
10//!
11//! IIR filters with feedback can drive their state values below
12//! the smallest normal float (`~1.18e-38` for f32). The CPU then
13//! treats every operation on those values as a denormal-arithmetic
14//! microcode trap, which on a hot core takes 50-100x longer than
15//! the same op on a normal float. A reverb decaying to silence is
16//! the classic case; on x86 without FTZ it can spike CPU 30x at
17//! the tail. Flushing denormals to zero loses 7 bits of dynamic
18//! range at the very bottom of the float range - inaudible in
19//! audio, mandatory in any non-trivial DSP path.
20//!
21//! ## Lifetime
22//!
23//! `DenormalGuard::new()` reads the current control word, ORs in
24//! the flush bits, writes it back, and stashes the original.
25//! `drop()` restores. The format wrappers' bridge layer
26//! (`truce_plugin`) wraps every `process()` call in a guard, so
27//! plugin authors get the right FPU state without opting in. A
28//! plugin that needs gradual underflow (extremely rare in audio)
29//! can construct an opposite guard inside `process()` to flip the
30//! bits back for the duration.
31
32/// RAII guard that enables denormal-flush mode on construction and
33/// restores the prior FPU control word on drop. See module docs.
34#[must_use = "denormal flush state reverts when this guard is dropped"]
35pub struct DenormalGuard {
36    saved: u64,
37}
38
39/// MXCSR bit 15: flush-to-zero on output denormals.
40#[cfg(target_arch = "x86_64")]
41const MXCSR_FTZ: u32 = 1 << 15;
42/// MXCSR bit 6: denormals-are-zero on input.
43#[cfg(target_arch = "x86_64")]
44const MXCSR_DAZ: u32 = 1 << 6;
45
46impl DenormalGuard {
47    /// Set FTZ/DAZ (`x86_64`) or FZ (`aarch64`). On other targets this
48    /// is a no-op and the guard is a zero-sized stub.
49    ///
50    /// Implemented via inline asm rather than the `_mm_getcsr` /
51    /// `_mm_setcsr` intrinsics: those are deprecated in current
52    /// stable Rust and the `_MM_DENORMALS_ZERO_ON` constant isn't
53    /// always available alongside them. The two-instruction
54    /// `stmxcsr` / `ldmxcsr` pair is the same machine code the
55    /// intrinsics emit, just spelled differently in source.
56    #[inline]
57    pub fn new() -> Self {
58        #[cfg(target_arch = "x86_64")]
59        {
60            let mut saved: u32 = 0;
61            // SAFETY: SSE2 (which defines MXCSR) is part of x86_64's
62            // baseline target feature set; stmxcsr / ldmxcsr always
63            // available on this arch.
64            unsafe {
65                std::arch::asm!(
66                    "stmxcsr [{0}]",
67                    in(reg) &raw mut saved,
68                    options(nostack, preserves_flags),
69                );
70                let new = saved | MXCSR_FTZ | MXCSR_DAZ;
71                std::arch::asm!(
72                    "ldmxcsr [{0}]",
73                    in(reg) &raw const new,
74                    options(nostack, preserves_flags),
75                );
76            }
77            return Self {
78                saved: u64::from(saved),
79            };
80        }
81        #[cfg(target_arch = "aarch64")]
82        {
83            let saved: u64;
84            // SAFETY: FPCR is accessible from EL0 on AArch64;
85            // reading and writing it is a normal user-mode op.
86            unsafe {
87                std::arch::asm!(
88                    "mrs {0}, fpcr",
89                    out(reg) saved,
90                    options(nomem, nostack, preserves_flags),
91                );
92                let new = saved | (1u64 << 24);
93                std::arch::asm!(
94                    "msr fpcr, {0}",
95                    in(reg) new,
96                    options(nomem, nostack, preserves_flags),
97                );
98            }
99            return Self { saved };
100        }
101        #[allow(unreachable_code)]
102        Self { saved: 0 }
103    }
104}
105
106impl Default for DenormalGuard {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112impl Drop for DenormalGuard {
113    #[inline]
114    fn drop(&mut self) {
115        #[cfg(target_arch = "x86_64")]
116        {
117            // SAFETY: see `new()`.
118            #[allow(clippy::cast_possible_truncation)]
119            let restore: u32 = self.saved as u32;
120            unsafe {
121                std::arch::asm!(
122                    "ldmxcsr [{0}]",
123                    in(reg) &raw const restore,
124                    options(nostack, preserves_flags),
125                );
126            }
127        }
128        #[cfg(target_arch = "aarch64")]
129        {
130            // SAFETY: see `new()`.
131            unsafe {
132                std::arch::asm!(
133                    "msr fpcr, {0}",
134                    in(reg) self.saved,
135                    options(nomem, nostack, preserves_flags),
136                );
137            }
138        }
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn guard_construct_drop_doesnt_panic() {
148        // Smoke test only; verifying the control word actually
149        // flipped requires raw FPU reads that the std intrinsics
150        // don't expose portably. The cycles-stalled bench in
151        // `truce-simd/benches` is the real-world check.
152        let _guard = DenormalGuard::new();
153    }
154
155    #[test]
156    fn nested_guards_restore_in_lifo_order() {
157        // Two guards in succession should each restore on drop;
158        // verifies the Drop impl doesn't trash unrelated MXCSR
159        // bits.
160        let outer = DenormalGuard::new();
161        {
162            let _inner = DenormalGuard::new();
163        }
164        drop(outer);
165    }
166}