Skip to main content

openentropy_core/sources/frontier/
amx_timing.rs

1//! AMX coprocessor timing — entropy from the Apple Matrix eXtensions unit.
2
3use crate::source::{EntropySource, SourceCategory, SourceInfo};
4use crate::sources::helpers::{extract_timing_entropy, mach_time};
5
6use super::extract_timing_entropy_debiased;
7
8/// Configuration for AMX timing entropy collection.
9///
10/// # Example
11/// ```
12/// # use openentropy_core::sources::frontier::AMXTimingConfig;
13/// // Use defaults (recommended)
14/// let config = AMXTimingConfig::default();
15///
16/// // Or customize
17/// let config = AMXTimingConfig {
18///     matrix_sizes: vec![32, 128],       // only two sizes
19///     interleave_memory_ops: true,
20///     von_neumann_debias: true,
21/// };
22/// ```
23#[derive(Debug, Clone)]
24pub struct AMXTimingConfig {
25    /// Matrix dimensions to cycle through for SGEMM dispatches.
26    ///
27    /// Different sizes stress different AMX pipeline configurations:
28    /// - Small (16-32): register-bound, fast dispatch
29    /// - Medium (48-64): L1-cache-bound
30    /// - Large (96-128): L2/SLC-bound, higher memory bandwidth pressure
31    ///
32    /// Must be non-empty. Each value is used as both M, N, and K dimensions.
33    ///
34    /// **Default:** `[16, 32, 48, 64, 96, 128]`
35    pub matrix_sizes: Vec<usize>,
36
37    /// Interleave volatile memory reads/writes between AMX dispatches.
38    ///
39    /// This thrashes a 64KB scratch buffer between matrix operations, disrupting
40    /// the AMX pipeline state and preventing it from settling into a steady-state
41    /// pattern. Increases min-entropy at the cost of slightly higher CPU usage.
42    ///
43    /// **Default:** `true`
44    pub interleave_memory_ops: bool,
45
46    /// Apply Von Neumann debiasing to raw timing deltas.
47    ///
48    /// The AMX timing source has severe bias (Shannon 6.985 but H∞ only 0.379).
49    /// Von Neumann debiasing pairs consecutive deltas and discards equal pairs,
50    /// emitting one unbiased bit per unequal pair. This costs ~50% of the raw
51    /// data but dramatically improves min-entropy.
52    ///
53    /// **Default:** `true`
54    pub von_neumann_debias: bool,
55}
56
57impl Default for AMXTimingConfig {
58    fn default() -> Self {
59        Self {
60            matrix_sizes: vec![16, 32, 48, 64, 96, 128],
61            interleave_memory_ops: true,
62            von_neumann_debias: true,
63        }
64    }
65}
66
67/// Harvests timing jitter from the AMX (Apple Matrix eXtensions) coprocessor.
68///
69/// # What it measures
70/// Nanosecond timing of SGEMM (single-precision matrix multiply) dispatches
71/// to the AMX coprocessor via the Accelerate framework's `cblas_sgemm`.
72///
73/// # Why it's entropic
74/// The AMX is a dedicated coprocessor on the Apple Silicon die with its own
75/// register file, pipeline, and memory paths. Its timing depends on:
76/// - Pipeline occupancy from ALL prior AMX operations (every process)
77/// - Memory bandwidth contention on the unified memory controller
78/// - Power state transitions (idle → active ramp-up latency)
79/// - SLC (System Level Cache) eviction patterns
80/// - Thermal throttling affecting AMX frequency independently of CPU cores
81///
82/// # What makes it unique
83/// No prior work has used AMX coprocessor timing as an entropy source. The AMX
84/// is a completely independent execution domain from CPU cores, providing
85/// entropy that is uncorrelated with CPU-based timing sources.
86///
87/// # Configuration
88/// See [`AMXTimingConfig`] for tunable parameters. Key options:
89/// - `von_neumann_debias`: fixes severe LSB bias (recommended: `true`)
90/// - `interleave_memory_ops`: disrupts pipeline steady-state
91/// - `matrix_sizes`: controls which AMX pipeline configurations are exercised
92#[derive(Default)]
93pub struct AMXTimingSource {
94    /// Source configuration. Use `Default::default()` for recommended settings.
95    pub config: AMXTimingConfig,
96}
97
98static AMX_TIMING_INFO: SourceInfo = SourceInfo {
99    name: "amx_timing",
100    description: "Apple AMX coprocessor matrix multiply timing jitter (debiased)",
101    physics: "Dispatches matrix multiplications to the AMX (Apple Matrix eXtensions) \
102              coprocessor via Accelerate BLAS and measures per-operation timing. The AMX is \
103              a dedicated execution unit with its own pipeline, register file, and memory \
104              paths. Timing depends on: AMX pipeline occupancy from ALL system AMX users, \
105              memory bandwidth contention, AMX power state transitions, and SLC cache state. \
106              Von Neumann debiasing corrects heavy LSB bias. Interleaved memory operations \
107              disrupt pipeline steady-state for higher min-entropy.",
108    category: SourceCategory::Frontier,
109    platform_requirements: &["macos"],
110    entropy_rate_estimate: 2500.0,
111    composite: false,
112};
113
114impl EntropySource for AMXTimingSource {
115    fn info(&self) -> &SourceInfo {
116        &AMX_TIMING_INFO
117    }
118
119    fn is_available(&self) -> bool {
120        cfg!(all(target_os = "macos", target_arch = "aarch64"))
121    }
122
123    fn collect(&self, n_samples: usize) -> Vec<u8> {
124        let debias = self.config.von_neumann_debias;
125        let raw_count = if debias {
126            n_samples * 8 + 128
127        } else {
128            n_samples * 4 + 64
129        };
130        let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
131
132        let sizes = &self.config.matrix_sizes;
133        if sizes.is_empty() {
134            return Vec::new();
135        }
136        let mut lcg: u64 = mach_time() | 1;
137
138        let interleave = self.config.interleave_memory_ops;
139        let mut scratch = if interleave {
140            vec![0u8; 65536]
141        } else {
142            Vec::new()
143        };
144
145        // Pre-allocate matrices at the maximum size to avoid per-iteration allocation.
146        let max_n = *sizes.iter().max().unwrap_or(&128);
147        let max_len = max_n * max_n;
148        let mut a = vec![0.0f32; max_len];
149        let mut b = vec![0.0f32; max_len];
150        let mut c = vec![0.0f32; max_len];
151
152        for i in 0..raw_count {
153            let n = sizes[i % sizes.len()];
154            let len = n * n;
155
156            for val in a[..len].iter_mut().chain(b[..len].iter_mut()) {
157                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
158                *val = (lcg >> 32) as f32 / u32::MAX as f32;
159            }
160
161            if interleave && !scratch.is_empty() {
162                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
163                let idx = (lcg >> 32) as usize % scratch.len();
164                unsafe {
165                    let ptr = scratch.as_mut_ptr().add(idx);
166                    std::ptr::write_volatile(ptr, std::ptr::read_volatile(ptr).wrapping_add(1));
167                }
168            }
169
170            let t0 = mach_time();
171            let trans_b = if i % 3 == 1 { 112 } else { 111 }; // CblasTrans vs CblasNoTrans
172
173            // SAFETY: cblas_sgemm is a well-defined C function from the Accelerate
174            // framework. On Apple Silicon, this dispatches to the AMX coprocessor.
175            unsafe {
176                cblas_sgemm(
177                    101, // CblasRowMajor
178                    111, // CblasNoTrans
179                    trans_b,
180                    n as i32,
181                    n as i32,
182                    n as i32,
183                    1.0,
184                    a.as_ptr(),
185                    n as i32,
186                    b.as_ptr(),
187                    n as i32,
188                    0.0,
189                    c.as_mut_ptr(),
190                    n as i32,
191                );
192            }
193
194            let t1 = mach_time();
195            std::hint::black_box(&c);
196            timings.push(t1.wrapping_sub(t0));
197        }
198
199        if debias {
200            extract_timing_entropy_debiased(&timings, n_samples)
201        } else {
202            extract_timing_entropy(&timings, n_samples)
203        }
204    }
205}
206
207// Accelerate framework CBLAS binding (Apple-provided, always available on macOS).
208unsafe extern "C" {
209    fn cblas_sgemm(
210        order: i32,
211        transa: i32,
212        transb: i32,
213        m: i32,
214        n: i32,
215        k: i32,
216        alpha: f32,
217        a: *const f32,
218        lda: i32,
219        b: *const f32,
220        ldb: i32,
221        beta: f32,
222        c: *mut f32,
223        ldc: i32,
224    );
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn info() {
233        let src = AMXTimingSource::default();
234        assert_eq!(src.name(), "amx_timing");
235        assert_eq!(src.info().category, SourceCategory::Frontier);
236        assert!(!src.info().composite);
237    }
238
239    #[test]
240    fn default_config() {
241        let config = AMXTimingConfig::default();
242        assert_eq!(config.matrix_sizes, vec![16, 32, 48, 64, 96, 128]);
243        assert!(config.interleave_memory_ops);
244        assert!(config.von_neumann_debias);
245    }
246
247    #[test]
248    fn custom_config() {
249        let src = AMXTimingSource {
250            config: AMXTimingConfig {
251                matrix_sizes: vec![32, 64],
252                interleave_memory_ops: false,
253                von_neumann_debias: false,
254            },
255        };
256        assert_eq!(src.config.matrix_sizes.len(), 2);
257        assert!(!src.config.interleave_memory_ops);
258    }
259
260    #[test]
261    fn empty_sizes_returns_empty() {
262        let src = AMXTimingSource {
263            config: AMXTimingConfig {
264                matrix_sizes: vec![],
265                interleave_memory_ops: false,
266                von_neumann_debias: false,
267            },
268        };
269        if src.is_available() {
270            assert!(src.collect(64).is_empty());
271        }
272    }
273
274    #[test]
275    #[ignore] // Requires macOS aarch64
276    fn collects_bytes() {
277        let src = AMXTimingSource::default();
278        if src.is_available() {
279            let data = src.collect(128);
280            assert!(!data.is_empty());
281            assert!(data.len() <= 128);
282        }
283    }
284
285    #[test]
286    #[ignore] // Requires macOS aarch64
287    fn no_debias_collects_bytes() {
288        let src = AMXTimingSource {
289            config: AMXTimingConfig {
290                von_neumann_debias: false,
291                ..AMXTimingConfig::default()
292            },
293        };
294        if src.is_available() {
295            assert!(!src.collect(64).is_empty());
296        }
297    }
298}