Skip to main content

photon_ring/
wait.rs

1// Copyright 2026 Photon Ring Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Wait strategies for blocking receive operations.
5//!
6//! [`WaitStrategy`] controls how a consumer thread waits when no message is
7//! available. All strategies are `no_std` compatible.
8//!
9//! | Strategy | Latency | CPU usage | Best for |
10//! |---|---|---|---|
11//! | `BusySpin` | Lowest (~0 ns wakeup) | 100% core | Dedicated, pinned cores |
12//! | `YieldSpin` | Low (~30 ns on x86) | High | Shared cores, SMT |
13//! | `BackoffSpin` | Medium (exponential) | Decreasing | Background consumers |
14//! | `Adaptive` | Auto-scaling | Varies | General purpose |
15//!
16//! # Platform-specific optimizations
17//!
18//! On **aarch64**, `YieldSpin` and `BackoffSpin` use the `WFE` (Wait For
19//! Event) instruction instead of `core::hint::spin_loop()` (which maps to
20//! `YIELD`). `WFE` puts the core into a low-power state until an event —
21//! such as a cache line invalidation from the publisher's store — wakes it.
22//! The `SEVL` + `WFE` pattern is used: `SEVL` sets the local event register
23//! so the first `WFE` doesn't block unconditionally.
24//!
25//! On **x86/x86_64**, `core::hint::spin_loop()` emits `PAUSE`, which is the
26//! standard spin-wait hint (~140 cycles on Skylake+).
27//!
28// NOTE: Intel Tremont+ CPUs support UMWAIT/TPAUSE instructions for
29// user-mode cache line monitoring. These would allow near-zero latency
30// wakeup without burning CPU. Not yet implemented — requires CPUID
31// feature detection (WAITPKG) and is only available on recent Intel.
32
33/// Strategy for blocking `recv()` and `SubscriberGroup::recv()`.
34///
35/// All variants are `no_std` compatible — no OS thread primitives required.
36///
37/// | Strategy | Latency | CPU usage | Best for |
38/// |---|---|---|---|
39/// | `BusySpin` | Lowest (~0 ns wakeup) | 100% core | Dedicated, pinned cores |
40/// | `YieldSpin` | Low (~30 ns on x86) | High | Shared cores, SMT |
41/// | `BackoffSpin` | Medium (exponential) | Decreasing | Background consumers |
42/// | `Adaptive` | Auto-scaling | Varies | General purpose |
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum WaitStrategy {
45    /// Pure busy-spin with no PAUSE instruction. Minimum wakeup latency
46    /// but consumes 100% of one CPU core. Use on dedicated, pinned cores.
47    BusySpin,
48
49    /// Spin with `core::hint::spin_loop()` (PAUSE on x86, YIELD on ARM)
50    /// between iterations. Yields the CPU pipeline to the SMT sibling
51    /// and reduces power consumption vs `BusySpin`.
52    YieldSpin,
53
54    /// Exponential backoff spin. Starts with bare spins, then escalates
55    /// to PAUSE-based spins with increasing delays. Good for consumers
56    /// that may be idle for extended periods without burning a full core.
57    BackoffSpin,
58
59    /// Three-phase escalation: bare spin for `spin_iters` iterations,
60    /// then PAUSE-spin for `yield_iters`, then repeated PAUSE bursts.
61    Adaptive {
62        /// Number of bare-spin iterations before escalating to PAUSE.
63        spin_iters: u32,
64        /// Number of PAUSE iterations before entering deep backoff.
65        yield_iters: u32,
66    },
67}
68
69impl Default for WaitStrategy {
70    fn default() -> Self {
71        WaitStrategy::Adaptive {
72            spin_iters: 64,
73            yield_iters: 64,
74        }
75    }
76}
77
78impl WaitStrategy {
79    /// Execute one wait iteration. Called by `recv_with` on each loop when
80    /// `try_recv` returns `Empty`.
81    ///
82    /// `iter` is the zero-based iteration count since the last successful
83    /// receive — it drives phase transitions in `Adaptive` and `BackoffSpin`.
84    #[inline]
85    pub(crate) fn wait(&self, iter: u32) {
86        match self {
87            WaitStrategy::BusySpin => {
88                // No hint — pure busy loop. Fastest wakeup, highest power.
89            }
90            WaitStrategy::YieldSpin => {
91                // On aarch64: SEVL + WFE puts the core into a low-power
92                // state until a cache-line event wakes it. SEVL sets the
93                // local event register so the first WFE returns immediately
94                // (avoids unconditional blocking).
95                // On x86: PAUSE yields the pipeline to the SMT sibling.
96                #[cfg(target_arch = "aarch64")]
97                unsafe {
98                    core::arch::asm!("sevl", options(nomem, nostack));
99                    core::arch::asm!("wfe", options(nomem, nostack));
100                }
101                #[cfg(not(target_arch = "aarch64"))]
102                core::hint::spin_loop();
103            }
104            WaitStrategy::BackoffSpin => {
105                // Exponential backoff: more iterations as we wait longer.
106                // On aarch64: WFE sleeps until a cache-line event, making
107                // each iteration near-zero power. On x86: PAUSE yields the
108                // pipeline with ~140 cycle delay per iteration.
109                let pauses = 1u32.wrapping_shl(iter.min(6)); // 1, 2, 4, 8, 16, 32, 64
110                for _ in 0..pauses {
111                    #[cfg(target_arch = "aarch64")]
112                    unsafe {
113                        core::arch::asm!("wfe", options(nomem, nostack));
114                    }
115                    #[cfg(not(target_arch = "aarch64"))]
116                    core::hint::spin_loop();
117                }
118            }
119            WaitStrategy::Adaptive {
120                spin_iters,
121                yield_iters,
122            } => {
123                if iter < *spin_iters {
124                    // Phase 1: bare spin — fastest wakeup.
125                } else if iter < spin_iters + yield_iters {
126                    // Phase 2: PAUSE-spin — yields pipeline.
127                    core::hint::spin_loop();
128                } else {
129                    // Phase 3: deep backoff — multiple PAUSE per iteration.
130                    for _ in 0..8 {
131                        core::hint::spin_loop();
132                    }
133                }
134            }
135        }
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn default_is_adaptive() {
145        let ws = WaitStrategy::default();
146        assert_eq!(
147            ws,
148            WaitStrategy::Adaptive {
149                spin_iters: 64,
150                yield_iters: 64,
151            }
152        );
153    }
154
155    #[test]
156    fn busy_spin_returns_immediately() {
157        let ws = WaitStrategy::BusySpin;
158        for i in 0..1000 {
159            ws.wait(i);
160        }
161    }
162
163    #[test]
164    fn yield_spin_returns() {
165        let ws = WaitStrategy::YieldSpin;
166        for i in 0..100 {
167            ws.wait(i);
168        }
169    }
170
171    #[test]
172    fn backoff_spin_returns() {
173        let ws = WaitStrategy::BackoffSpin;
174        for i in 0..20 {
175            ws.wait(i);
176        }
177    }
178
179    #[test]
180    fn adaptive_phases() {
181        let ws = WaitStrategy::Adaptive {
182            spin_iters: 4,
183            yield_iters: 4,
184        };
185        for i in 0..20 {
186            ws.wait(i);
187        }
188    }
189
190    #[test]
191    fn clone_and_copy() {
192        let ws = WaitStrategy::BusySpin;
193        let ws2 = ws;
194        #[allow(clippy::clone_on_copy)]
195        let ws3 = ws.clone();
196        assert_eq!(ws, ws2);
197        assert_eq!(ws, ws3);
198    }
199
200    #[test]
201    fn debug_format() {
202        use alloc::format;
203        let ws = WaitStrategy::BusySpin;
204        let s = format!("{ws:?}");
205        assert!(s.contains("BusySpin"));
206    }
207}