photon_ring/wait.rs
1// Copyright 2026 Photon Ring Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Wait strategies for blocking receive operations.
5//!
6//! [`WaitStrategy`] controls how a consumer thread waits when no message is
7//! available. All strategies are `no_std` compatible.
8//!
9//! | Strategy | Latency | CPU usage | Best for |
10//! |---|---|---|---|
11//! | `BusySpin` | Lowest (~0 ns wakeup) | 100% core | Dedicated, pinned cores |
12//! | `YieldSpin` | Low (~30 ns on x86) | High | Shared cores, SMT |
13//! | `BackoffSpin` | Medium (exponential) | Decreasing | Background consumers |
14//! | `Adaptive` | Auto-scaling | Varies | General purpose |
15//!
16//! # Platform-specific optimizations
17//!
18//! On **aarch64**, `YieldSpin` and `BackoffSpin` use the `WFE` (Wait For
19//! Event) instruction instead of `core::hint::spin_loop()` (which maps to
20//! `YIELD`). `WFE` puts the core into a low-power state until an event —
21//! such as a cache line invalidation from the publisher's store — wakes it.
22//! The `SEVL` + `WFE` pattern is used: `SEVL` sets the local event register
23//! so the first `WFE` doesn't block unconditionally.
24//!
25//! On **x86/x86_64**, `core::hint::spin_loop()` emits `PAUSE`, which is the
26//! standard spin-wait hint (~140 cycles on Skylake+).
27//!
28// NOTE: Intel Tremont+ CPUs support UMWAIT/TPAUSE instructions for
29// user-mode cache line monitoring. These would allow near-zero latency
30// wakeup without burning CPU. Not yet implemented — requires CPUID
31// feature detection (WAITPKG) and is only available on recent Intel.
32
33/// Strategy for blocking `recv()` and `SubscriberGroup::recv()`.
34///
35/// All variants are `no_std` compatible — no OS thread primitives required.
36///
37/// | Strategy | Latency | CPU usage | Best for |
38/// |---|---|---|---|
39/// | `BusySpin` | Lowest (~0 ns wakeup) | 100% core | Dedicated, pinned cores |
40/// | `YieldSpin` | Low (~30 ns on x86) | High | Shared cores, SMT |
41/// | `BackoffSpin` | Medium (exponential) | Decreasing | Background consumers |
42/// | `Adaptive` | Auto-scaling | Varies | General purpose |
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum WaitStrategy {
45 /// Pure busy-spin with no PAUSE instruction. Minimum wakeup latency
46 /// but consumes 100% of one CPU core. Use on dedicated, pinned cores.
47 BusySpin,
48
49 /// Spin with `core::hint::spin_loop()` (PAUSE on x86, YIELD on ARM)
50 /// between iterations. Yields the CPU pipeline to the SMT sibling
51 /// and reduces power consumption vs `BusySpin`.
52 YieldSpin,
53
54 /// Exponential backoff spin. Starts with bare spins, then escalates
55 /// to PAUSE-based spins with increasing delays. Good for consumers
56 /// that may be idle for extended periods without burning a full core.
57 BackoffSpin,
58
59 /// Three-phase escalation: bare spin for `spin_iters` iterations,
60 /// then PAUSE-spin for `yield_iters`, then repeated PAUSE bursts.
61 Adaptive {
62 /// Number of bare-spin iterations before escalating to PAUSE.
63 spin_iters: u32,
64 /// Number of PAUSE iterations before entering deep backoff.
65 yield_iters: u32,
66 },
67}
68
69impl Default for WaitStrategy {
70 fn default() -> Self {
71 WaitStrategy::Adaptive {
72 spin_iters: 64,
73 yield_iters: 64,
74 }
75 }
76}
77
78impl WaitStrategy {
79 /// Execute one wait iteration. Called by `recv_with` on each loop when
80 /// `try_recv` returns `Empty`.
81 ///
82 /// `iter` is the zero-based iteration count since the last successful
83 /// receive — it drives phase transitions in `Adaptive` and `BackoffSpin`.
84 #[inline]
85 pub(crate) fn wait(&self, iter: u32) {
86 match self {
87 WaitStrategy::BusySpin => {
88 // No hint — pure busy loop. Fastest wakeup, highest power.
89 }
90 WaitStrategy::YieldSpin => {
91 // On aarch64: SEVL + WFE puts the core into a low-power
92 // state until a cache-line event wakes it. SEVL sets the
93 // local event register so the first WFE returns immediately
94 // (avoids unconditional blocking).
95 // On x86: PAUSE yields the pipeline to the SMT sibling.
96 #[cfg(target_arch = "aarch64")]
97 unsafe {
98 core::arch::asm!("sevl", options(nomem, nostack));
99 core::arch::asm!("wfe", options(nomem, nostack));
100 }
101 #[cfg(not(target_arch = "aarch64"))]
102 core::hint::spin_loop();
103 }
104 WaitStrategy::BackoffSpin => {
105 // Exponential backoff: more iterations as we wait longer.
106 // On aarch64: WFE sleeps until a cache-line event, making
107 // each iteration near-zero power. On x86: PAUSE yields the
108 // pipeline with ~140 cycle delay per iteration.
109 let pauses = 1u32.wrapping_shl(iter.min(6)); // 1, 2, 4, 8, 16, 32, 64
110 for _ in 0..pauses {
111 #[cfg(target_arch = "aarch64")]
112 unsafe {
113 core::arch::asm!("wfe", options(nomem, nostack));
114 }
115 #[cfg(not(target_arch = "aarch64"))]
116 core::hint::spin_loop();
117 }
118 }
119 WaitStrategy::Adaptive {
120 spin_iters,
121 yield_iters,
122 } => {
123 if iter < *spin_iters {
124 // Phase 1: bare spin — fastest wakeup.
125 } else if iter < spin_iters + yield_iters {
126 // Phase 2: PAUSE-spin — yields pipeline.
127 core::hint::spin_loop();
128 } else {
129 // Phase 3: deep backoff — multiple PAUSE per iteration.
130 for _ in 0..8 {
131 core::hint::spin_loop();
132 }
133 }
134 }
135 }
136 }
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142
143 #[test]
144 fn default_is_adaptive() {
145 let ws = WaitStrategy::default();
146 assert_eq!(
147 ws,
148 WaitStrategy::Adaptive {
149 spin_iters: 64,
150 yield_iters: 64,
151 }
152 );
153 }
154
155 #[test]
156 fn busy_spin_returns_immediately() {
157 let ws = WaitStrategy::BusySpin;
158 for i in 0..1000 {
159 ws.wait(i);
160 }
161 }
162
163 #[test]
164 fn yield_spin_returns() {
165 let ws = WaitStrategy::YieldSpin;
166 for i in 0..100 {
167 ws.wait(i);
168 }
169 }
170
171 #[test]
172 fn backoff_spin_returns() {
173 let ws = WaitStrategy::BackoffSpin;
174 for i in 0..20 {
175 ws.wait(i);
176 }
177 }
178
179 #[test]
180 fn adaptive_phases() {
181 let ws = WaitStrategy::Adaptive {
182 spin_iters: 4,
183 yield_iters: 4,
184 };
185 for i in 0..20 {
186 ws.wait(i);
187 }
188 }
189
190 #[test]
191 fn clone_and_copy() {
192 let ws = WaitStrategy::BusySpin;
193 let ws2 = ws;
194 #[allow(clippy::clone_on_copy)]
195 let ws3 = ws.clone();
196 assert_eq!(ws, ws2);
197 assert_eq!(ws, ws3);
198 }
199
200 #[test]
201 fn debug_format() {
202 use alloc::format;
203 let ws = WaitStrategy::BusySpin;
204 let s = format!("{ws:?}");
205 assert!(s.contains("BusySpin"));
206 }
207}