truce_core/denormal.rs
1//! Denormal flush guard for the audio thread.
2//!
3//! FTZ (flush-to-zero) and DAZ (denormals-are-zero) on `x86_64`,
4//! FZ (flush-to-zero) on `aarch64`. Set on entry to a plugin's
5//! `process()` and restored on drop, so the FPU control word the
6//! audio thread observes stays consistent across hosts and other
7//! plugins on the same thread.
8//!
9//! ## Why this matters
10//!
11//! IIR filters with feedback can drive their state values below
12//! the smallest normal float (`~1.18e-38` for f32). The CPU then
13//! treats every operation on those values as a denormal-arithmetic
14//! microcode trap, which on a hot core takes 50-100x longer than
15//! the same op on a normal float. A reverb decaying to silence is
16//! the classic case; on x86 without FTZ it can spike CPU 30x at
17//! the tail. Flushing denormals to zero loses 7 bits of dynamic
18//! range at the very bottom of the float range - inaudible in
19//! audio, mandatory in any non-trivial DSP path.
20//!
21//! ## Lifetime
22//!
23//! `DenormalGuard::new()` reads the current control word, ORs in
24//! the flush bits, writes it back, and stashes the original.
25//! `drop()` restores. The format wrappers' bridge layer
26//! (`truce_plugin`) wraps every `process()` call in a guard, so
27//! plugin authors get the right FPU state without opting in. A
28//! plugin that needs gradual underflow (extremely rare in audio)
29//! can construct an opposite guard inside `process()` to flip the
30//! bits back for the duration.
31
32/// RAII guard that enables denormal-flush mode on construction and
33/// restores the prior FPU control word on drop. See module docs.
34#[must_use = "denormal flush state reverts when this guard is dropped"]
35pub struct DenormalGuard {
36 saved: u64,
37}
38
39/// MXCSR bit 15: flush-to-zero on output denormals.
40#[cfg(target_arch = "x86_64")]
41const MXCSR_FTZ: u32 = 1 << 15;
42/// MXCSR bit 6: denormals-are-zero on input.
43#[cfg(target_arch = "x86_64")]
44const MXCSR_DAZ: u32 = 1 << 6;
45
46impl DenormalGuard {
47 /// Set FTZ/DAZ (`x86_64`) or FZ (`aarch64`). On other targets this
48 /// is a no-op and the guard is a zero-sized stub.
49 ///
50 /// Implemented via inline asm rather than the `_mm_getcsr` /
51 /// `_mm_setcsr` intrinsics: those are deprecated in current
52 /// stable Rust and the `_MM_DENORMALS_ZERO_ON` constant isn't
53 /// always available alongside them. The two-instruction
54 /// `stmxcsr` / `ldmxcsr` pair is the same machine code the
55 /// intrinsics emit, just spelled differently in source.
56 #[inline]
57 pub fn new() -> Self {
58 #[cfg(target_arch = "x86_64")]
59 {
60 let mut saved: u32 = 0;
61 // SAFETY: SSE2 (which defines MXCSR) is part of x86_64's
62 // baseline target feature set; stmxcsr / ldmxcsr always
63 // available on this arch.
64 unsafe {
65 std::arch::asm!(
66 "stmxcsr [{0}]",
67 in(reg) &raw mut saved,
68 options(nostack, preserves_flags),
69 );
70 let new = saved | MXCSR_FTZ | MXCSR_DAZ;
71 std::arch::asm!(
72 "ldmxcsr [{0}]",
73 in(reg) &raw const new,
74 options(nostack, preserves_flags),
75 );
76 }
77 return Self {
78 saved: u64::from(saved),
79 };
80 }
81 #[cfg(target_arch = "aarch64")]
82 {
83 let saved: u64;
84 // SAFETY: FPCR is accessible from EL0 on AArch64;
85 // reading and writing it is a normal user-mode op.
86 unsafe {
87 std::arch::asm!(
88 "mrs {0}, fpcr",
89 out(reg) saved,
90 options(nomem, nostack, preserves_flags),
91 );
92 let new = saved | (1u64 << 24);
93 std::arch::asm!(
94 "msr fpcr, {0}",
95 in(reg) new,
96 options(nomem, nostack, preserves_flags),
97 );
98 }
99 return Self { saved };
100 }
101 #[allow(unreachable_code)]
102 Self { saved: 0 }
103 }
104}
105
106impl Default for DenormalGuard {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112impl Drop for DenormalGuard {
113 #[inline]
114 fn drop(&mut self) {
115 #[cfg(target_arch = "x86_64")]
116 {
117 // SAFETY: see `new()`.
118 #[allow(clippy::cast_possible_truncation)]
119 let restore: u32 = self.saved as u32;
120 unsafe {
121 std::arch::asm!(
122 "ldmxcsr [{0}]",
123 in(reg) &raw const restore,
124 options(nostack, preserves_flags),
125 );
126 }
127 }
128 #[cfg(target_arch = "aarch64")]
129 {
130 // SAFETY: see `new()`.
131 unsafe {
132 std::arch::asm!(
133 "msr fpcr, {0}",
134 in(reg) self.saved,
135 options(nomem, nostack, preserves_flags),
136 );
137 }
138 }
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 fn guard_construct_drop_doesnt_panic() {
148 // Smoke test only; verifying the control word actually
149 // flipped requires raw FPU reads that the std intrinsics
150 // don't expose portably. The cycles-stalled bench in
151 // `truce-simd/benches` is the real-world check.
152 let _guard = DenormalGuard::new();
153 }
154
155 #[test]
156 fn nested_guards_restore_in_lifo_order() {
157 // Two guards in succession should each restore on drop;
158 // verifies the Drop impl doesn't trash unrelated MXCSR
159 // bits.
160 let outer = DenormalGuard::new();
161 {
162 let _inner = DenormalGuard::new();
163 }
164 drop(outer);
165 }
166}