Skip to main content

openentropy_core/sources/frontier/
amx_timing.rs

1//! AMX coprocessor timing — entropy from the Apple Matrix eXtensions unit.
2
3use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
4#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
5use crate::sources::helpers::{extract_timing_entropy, mach_time};
6
7#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
8use super::extract_timing_entropy_debiased;
9
10/// Configuration for AMX timing entropy collection.
11///
12/// # Example
13/// ```
14/// # use openentropy_core::sources::frontier::AMXTimingConfig;
15/// // Use defaults (recommended)
16/// let config = AMXTimingConfig::default();
17///
18/// // Or customize
19/// let config = AMXTimingConfig {
20///     matrix_sizes: vec![32, 128],       // only two sizes
21///     interleave_memory_ops: true,
22///     von_neumann_debias: true,
23/// };
24/// ```
25#[derive(Debug, Clone)]
26pub struct AMXTimingConfig {
27    /// Matrix dimensions to cycle through for SGEMM dispatches.
28    ///
29    /// Different sizes stress different AMX pipeline configurations:
30    /// - Small (16-32): register-bound, fast dispatch
31    /// - Medium (48-64): L1-cache-bound
32    /// - Large (96-128): L2/SLC-bound, higher memory bandwidth pressure
33    ///
34    /// Must be non-empty. Each value is used as both M, N, and K dimensions.
35    ///
36    /// **Default:** `[16, 32, 48, 64, 96, 128]`
37    pub matrix_sizes: Vec<usize>,
38
39    /// Interleave volatile memory reads/writes between AMX dispatches.
40    ///
41    /// This thrashes a 64KB scratch buffer between matrix operations, disrupting
42    /// the AMX pipeline state and preventing it from settling into a steady-state
43    /// pattern. Increases min-entropy at the cost of slightly higher CPU usage.
44    ///
45    /// **Default:** `true`
46    pub interleave_memory_ops: bool,
47
48    /// Apply Von Neumann debiasing to raw timing deltas.
49    ///
50    /// The AMX timing source has severe bias.
51    /// Von Neumann debiasing pairs consecutive deltas and discards equal pairs,
52    /// emitting one unbiased bit per unequal pair. This costs ~50% of the raw
53    /// data but dramatically improves min-entropy.
54    ///
55    /// **Default:** `true`
56    pub von_neumann_debias: bool,
57}
58
59impl Default for AMXTimingConfig {
60    fn default() -> Self {
61        Self {
62            matrix_sizes: vec![16, 32, 48, 64, 96, 128],
63            interleave_memory_ops: true,
64            von_neumann_debias: true,
65        }
66    }
67}
68
69/// Harvests timing jitter from the AMX (Apple Matrix eXtensions) coprocessor.
70///
71/// # What it measures
72/// Nanosecond timing of SGEMM (single-precision matrix multiply) dispatches
73/// to the AMX coprocessor via the Accelerate framework's `cblas_sgemm`.
74///
75/// # Why it's entropic
76/// The AMX is a dedicated coprocessor on the Apple Silicon die with its own
77/// register file, pipeline, and memory paths. Its timing depends on:
78/// - Pipeline occupancy from ALL prior AMX operations (every process)
79/// - Memory bandwidth contention on the unified memory controller
80/// - Power state transitions (idle → active ramp-up latency)
81/// - SLC (System Level Cache) eviction patterns
82/// - Thermal throttling affecting AMX frequency independently of CPU cores
83///
84/// # What makes it unique
85/// No prior work has used AMX coprocessor timing as an entropy source. The AMX
86/// is a completely independent execution domain from CPU cores, providing
87/// entropy that is uncorrelated with CPU-based timing sources.
88///
89/// # Configuration
90/// See [`AMXTimingConfig`] for tunable parameters. Key options:
91/// - `von_neumann_debias`: fixes severe LSB bias (recommended: `true`)
92/// - `interleave_memory_ops`: disrupts pipeline steady-state
93/// - `matrix_sizes`: controls which AMX pipeline configurations are exercised
94#[derive(Default)]
95pub struct AMXTimingSource {
96    /// Source configuration. Use `Default::default()` for recommended settings.
97    pub config: AMXTimingConfig,
98}
99
100static AMX_TIMING_INFO: SourceInfo = SourceInfo {
101    name: "amx_timing",
102    description: "Apple AMX coprocessor matrix multiply timing jitter (debiased)",
103    physics: "Dispatches matrix multiplications to the AMX (Apple Matrix eXtensions) \
104              coprocessor via Accelerate BLAS and measures per-operation timing. The AMX is \
105              a dedicated execution unit with its own pipeline, register file, and memory \
106              paths. Timing depends on: AMX pipeline occupancy from ALL system AMX users, \
107              memory bandwidth contention, AMX power state transitions, and SLC cache state. \
108              Von Neumann debiasing corrects heavy LSB bias. Interleaved memory operations \
109              disrupt pipeline steady-state for higher min-entropy.",
110    category: SourceCategory::Microarch,
111    platform: Platform::MacOS,
112    requirements: &[Requirement::AppleSilicon],
113    entropy_rate_estimate: 2500.0,
114    composite: false,
115};
116
117impl EntropySource for AMXTimingSource {
118    fn info(&self) -> &SourceInfo {
119        &AMX_TIMING_INFO
120    }
121
122    fn is_available(&self) -> bool {
123        cfg!(all(target_os = "macos", target_arch = "aarch64"))
124    }
125
126    fn collect(&self, n_samples: usize) -> Vec<u8> {
127        #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
128        {
129            let _ = n_samples;
130            Vec::new()
131        }
132
133        #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
134        {
135            let debias = self.config.von_neumann_debias;
136            let raw_count = if debias {
137                n_samples * 8 + 128
138            } else {
139                n_samples * 4 + 64
140            };
141            let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
142
143            let sizes = &self.config.matrix_sizes;
144            if sizes.is_empty() {
145                return Vec::new();
146            }
147            let mut lcg: u64 = mach_time() | 1;
148
149            let interleave = self.config.interleave_memory_ops;
150            let mut scratch = if interleave {
151                vec![0u8; 65536]
152            } else {
153                Vec::new()
154            };
155
156            // Pre-allocate matrices at the maximum size to avoid per-iteration allocation.
157            let max_n = *sizes.iter().max().unwrap_or(&128);
158            let max_len = max_n * max_n;
159            let mut a = vec![0.0f32; max_len];
160            let mut b = vec![0.0f32; max_len];
161            let mut c = vec![0.0f32; max_len];
162
163            for i in 0..raw_count {
164                let n = sizes[i % sizes.len()];
165                let len = n * n;
166
167                for val in a[..len].iter_mut().chain(b[..len].iter_mut()) {
168                    lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
169                    *val = (lcg >> 32) as f32 / u32::MAX as f32;
170                }
171
172                if interleave && !scratch.is_empty() {
173                    lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
174                    let idx = (lcg >> 32) as usize % scratch.len();
175                    unsafe {
176                        let ptr = scratch.as_mut_ptr().add(idx);
177                        std::ptr::write_volatile(ptr, std::ptr::read_volatile(ptr).wrapping_add(1));
178                    }
179                }
180
181                let t0 = mach_time();
182                let trans_b = if i % 3 == 1 { 112 } else { 111 }; // CblasTrans vs CblasNoTrans
183
184                // SAFETY: cblas_sgemm is a well-defined C function from the Accelerate
185                // framework. On Apple Silicon, this dispatches to the AMX coprocessor.
186                unsafe {
187                    cblas_sgemm(
188                        101, // CblasRowMajor
189                        111, // CblasNoTrans
190                        trans_b,
191                        n as i32,
192                        n as i32,
193                        n as i32,
194                        1.0,
195                        a.as_ptr(),
196                        n as i32,
197                        b.as_ptr(),
198                        n as i32,
199                        0.0,
200                        c.as_mut_ptr(),
201                        n as i32,
202                    );
203                }
204
205                let t1 = mach_time();
206                std::hint::black_box(&c);
207                timings.push(t1.wrapping_sub(t0));
208            }
209
210            if debias {
211                extract_timing_entropy_debiased(&timings, n_samples)
212            } else {
213                extract_timing_entropy(&timings, n_samples)
214            }
215        }
216    }
217}
218
219// Accelerate framework CBLAS binding (Apple-provided, always available on macOS).
220#[cfg(target_os = "macos")]
221unsafe extern "C" {
222    fn cblas_sgemm(
223        order: i32,
224        transa: i32,
225        transb: i32,
226        m: i32,
227        n: i32,
228        k: i32,
229        alpha: f32,
230        a: *const f32,
231        lda: i32,
232        b: *const f32,
233        ldb: i32,
234        beta: f32,
235        c: *mut f32,
236        ldc: i32,
237    );
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn info() {
246        let src = AMXTimingSource::default();
247        assert_eq!(src.name(), "amx_timing");
248        assert_eq!(src.info().category, SourceCategory::Microarch);
249        assert!(!src.info().composite);
250    }
251
252    #[test]
253    fn default_config() {
254        let config = AMXTimingConfig::default();
255        assert_eq!(config.matrix_sizes, vec![16, 32, 48, 64, 96, 128]);
256        assert!(config.interleave_memory_ops);
257        assert!(config.von_neumann_debias);
258    }
259
260    #[test]
261    fn custom_config() {
262        let src = AMXTimingSource {
263            config: AMXTimingConfig {
264                matrix_sizes: vec![32, 64],
265                interleave_memory_ops: false,
266                von_neumann_debias: false,
267            },
268        };
269        assert_eq!(src.config.matrix_sizes.len(), 2);
270        assert!(!src.config.interleave_memory_ops);
271    }
272
273    #[test]
274    fn empty_sizes_returns_empty() {
275        let src = AMXTimingSource {
276            config: AMXTimingConfig {
277                matrix_sizes: vec![],
278                interleave_memory_ops: false,
279                von_neumann_debias: false,
280            },
281        };
282        if src.is_available() {
283            assert!(src.collect(64).is_empty());
284        }
285    }
286
287    #[test]
288    #[ignore] // Requires macOS aarch64
289    fn collects_bytes() {
290        let src = AMXTimingSource::default();
291        if src.is_available() {
292            let data = src.collect(128);
293            assert!(!data.is_empty());
294            assert!(data.len() <= 128);
295        }
296    }
297
298    #[test]
299    #[ignore] // Requires macOS aarch64
300    fn no_debias_collects_bytes() {
301        let src = AMXTimingSource {
302            config: AMXTimingConfig {
303                von_neumann_debias: false,
304                ..AMXTimingConfig::default()
305            },
306        };
307        if src.is_available() {
308            assert!(!src.collect(64).is_empty());
309        }
310    }
311}