Skip to main content

openentropy_core/sources/microarch/
cas_contention.rs

1//! Atomic CAS contention — entropy from multi-thread compare-and-swap arbitration.
2//!
3//! Multiple threads race on atomic CAS operations targeting shared cache lines.
4//! The hardware coherence engine's arbitration order is physically nondeterministic.
5//!
6
7use crate::source::{EntropySource, Platform, SourceCategory, SourceInfo};
8use crate::sources::helpers::{mach_time, xor_fold_u64};
9
10use std::sync::Arc;
11use std::sync::atomic::{AtomicU64, Ordering};
12use std::thread;
13
14const NUM_THREADS: usize = 4;
15const NUM_TARGETS: usize = 64;
16// 128-byte spacing to put each target on its own Apple Silicon cache line.
17const TARGET_SPACING: usize = 16; // 16 * 8 bytes = 128 bytes
18
19/// Configuration for CAS contention entropy collection.
20#[derive(Debug, Clone)]
21pub struct CASContentionConfig {
22    /// Number of threads to spawn for contention.
23    ///
24    /// More threads increase contention and entropy quality at the cost of CPU.
25    ///
26    /// **Default:** `4`
27    pub num_threads: usize,
28}
29
30impl Default for CASContentionConfig {
31    fn default() -> Self {
32        Self {
33            num_threads: NUM_THREADS,
34        }
35    }
36}
37
38/// CAS contention entropy source.
39///
40/// Spawns multiple threads that perform atomic compare-and-swap on shared
41/// targets spread across cache lines. The arbitration timing between threads
42/// competing for the same cache line is physically nondeterministic because:
43///
44/// 1. The cache coherence protocol (MOESI on Apple Silicon) arbitrates
45///    concurrent exclusive-access requests nondeterministically.
46/// 2. The interconnect fabric latency varies with thermal state and traffic.
47/// 3. Each thread's CAS targets are chosen pseudo-randomly, creating
48///    unpredictable contention patterns.
49/// 4. XOR-combining timings from all threads amplifies the arbitration entropy.
50pub struct CASContentionSource {
51    config: CASContentionConfig,
52}
53
54impl CASContentionSource {
55    pub fn new(config: CASContentionConfig) -> Self {
56        Self { config }
57    }
58}
59
60impl Default for CASContentionSource {
61    fn default() -> Self {
62        Self::new(CASContentionConfig::default())
63    }
64}
65
66static CAS_CONTENTION_INFO: SourceInfo = SourceInfo {
67    name: "cas_contention",
68    description: "Multi-thread atomic CAS arbitration contention jitter",
69    physics: "Spawns multiple threads (default 4) performing atomic compare-and-swap operations on \
70              shared targets spread across 128-byte-aligned cache lines. The \
71              hardware coherence engine (MOESI protocol on Apple Silicon) must \
72              arbitrate concurrent exclusive-access requests. This arbitration is \
73              physically nondeterministic due to interconnect fabric latency \
74              variations, thermal state, and traffic from other cores/devices. \
75              XOR-combining timing measurements from all threads amplifies the \
76              arbitration entropy.",
77    category: SourceCategory::Microarch,
78    platform: Platform::Any,
79    requirements: &[],
80    entropy_rate_estimate: 2.0,
81    composite: false,
82    is_fast: false,
83};
84
85struct ThreadResult {
86    timings: Vec<u64>,
87}
88
89impl EntropySource for CASContentionSource {
90    fn info(&self) -> &SourceInfo {
91        &CAS_CONTENTION_INFO
92    }
93
94    fn is_available(&self) -> bool {
95        true
96    }
97
98    fn collect(&self, n_samples: usize) -> Vec<u8> {
99        let samples_per_thread = n_samples * 4 + 64;
100        let nthreads = self.config.num_threads;
101
102        // Allocate contention targets (each on its own cache line).
103        let total_atomics = NUM_TARGETS * TARGET_SPACING;
104        let targets: Arc<Vec<AtomicU64>> =
105            Arc::new((0..total_atomics).map(|_| AtomicU64::new(0)).collect());
106
107        let go = Arc::new(AtomicU64::new(0));
108
109        let mut handles = Vec::with_capacity(nthreads);
110
111        for thread_id in 0..nthreads {
112            let targets = targets.clone();
113            let go = go.clone();
114            let count = samples_per_thread;
115
116            handles.push(thread::spawn(move || {
117                let mut timings = Vec::with_capacity(count);
118                let mut lcg: u64 = mach_time() ^ ((thread_id as u64) << 32) | 1;
119
120                // Wait for go signal.
121                while go.load(Ordering::Acquire) == 0 {
122                    std::hint::spin_loop();
123                }
124
125                for _ in 0..count {
126                    lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
127                    let idx = ((lcg >> 32) as usize % NUM_TARGETS) * TARGET_SPACING;
128
129                    let t0 = mach_time();
130
131                    let expected = targets[idx].load(Ordering::Relaxed);
132                    let _ = targets[idx].compare_exchange_weak(
133                        expected,
134                        expected.wrapping_add(1),
135                        Ordering::AcqRel,
136                        Ordering::Relaxed,
137                    );
138
139                    let t1 = mach_time();
140                    timings.push(t1.wrapping_sub(t0));
141                }
142
143                ThreadResult { timings }
144            }));
145        }
146
147        // Start all threads simultaneously for maximum contention.
148        go.store(1, Ordering::Release);
149
150        // Collect results (threads are bounded by their iteration count).
151        let results: Vec<ThreadResult> = handles
152            .into_iter()
153            .map(|h| h.join().unwrap_or(ThreadResult { timings: vec![] }))
154            .collect();
155
156        // XOR-combine timings from all threads for maximum entropy.
157        let min_len = results.iter().map(|r| r.timings.len()).min().unwrap_or(0);
158        if min_len < 4 {
159            return Vec::new();
160        }
161
162        let mut combined: Vec<u64> = Vec::with_capacity(min_len);
163        for i in 0..min_len {
164            let mut val = 0u64;
165            for result in &results {
166                val ^= result.timings[i];
167            }
168            combined.push(val);
169        }
170
171        // Extract entropy: deltas → XOR adjacent → xor-fold.
172        let deltas: Vec<u64> = combined
173            .windows(2)
174            .map(|w| w[1].wrapping_sub(w[0]))
175            .collect();
176        let xored: Vec<u64> = deltas.windows(2).map(|w| w[0] ^ w[1]).collect();
177        let mut raw: Vec<u8> = xored.iter().map(|&x| xor_fold_u64(x)).collect();
178        raw.truncate(n_samples);
179        raw
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn info() {
189        let src = CASContentionSource::default();
190        assert_eq!(src.info().name, "cas_contention");
191        assert!(matches!(src.info().category, SourceCategory::Microarch));
192        assert!(!src.info().composite);
193    }
194
195    #[test]
196    fn custom_config() {
197        let config = CASContentionConfig { num_threads: 2 };
198        let src = CASContentionSource::new(config);
199        assert_eq!(src.config.num_threads, 2);
200    }
201
202    #[test]
203    #[ignore] // Hardware-dependent: requires multi-core CPU
204    fn collects_bytes() {
205        let src = CASContentionSource::default();
206        assert!(src.is_available());
207        let data = src.collect(64);
208        assert!(!data.is_empty());
209        let unique: std::collections::HashSet<u8> = data.iter().copied().collect();
210        assert!(unique.len() > 1, "Expected variation in collected bytes");
211    }
212}