Skip to main content

openentropy_core/sources/microarch/
amx_timing.rs

1//! AMX coprocessor timing — entropy from the Apple Matrix eXtensions unit.
2
3use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
4#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
5use crate::sources::helpers::{extract_timing_entropy, mach_time};
6
7/// Configuration for AMX timing entropy collection.
8///
9/// # Example
10/// ```
11/// # use openentropy_core::sources::microarch::AMXTimingConfig;
12/// // Use defaults (recommended)
13/// let config = AMXTimingConfig::default();
14///
15/// // Or customize
16/// let config = AMXTimingConfig {
17///     matrix_sizes: vec![32, 128],       // only two sizes
18///     interleave_memory_ops: true,
19/// };
20/// ```
21#[derive(Debug, Clone)]
22pub struct AMXTimingConfig {
23    /// Matrix dimensions to cycle through for SGEMM dispatches.
24    ///
25    /// Different sizes stress different AMX pipeline configurations:
26    /// - Small (16-32): register-bound, fast dispatch
27    /// - Medium (48-64): L1-cache-bound
28    /// - Large (96-128): L2/SLC-bound, higher memory bandwidth pressure
29    ///
30    /// Must be non-empty. Each value is used as both M, N, and K dimensions.
31    ///
32    /// **Default:** `[16, 32, 48, 64, 96, 128]`
33    pub matrix_sizes: Vec<usize>,
34
35    /// Interleave volatile memory reads/writes between AMX dispatches.
36    ///
37    /// This thrashes a 64KB scratch buffer between matrix operations, disrupting
38    /// the AMX pipeline state and preventing it from settling into a steady-state
39    /// pattern. Increases min-entropy at the cost of slightly higher CPU usage.
40    ///
41    /// **Default:** `true`
42    pub interleave_memory_ops: bool,
43}
44
45impl Default for AMXTimingConfig {
46    fn default() -> Self {
47        Self {
48            matrix_sizes: vec![16, 32, 48, 64, 96, 128],
49            interleave_memory_ops: true,
50        }
51    }
52}
53
54/// Harvests timing jitter from the AMX (Apple Matrix eXtensions) coprocessor.
55///
56/// # What it measures
57/// Nanosecond timing of SGEMM (single-precision matrix multiply) dispatches
58/// to the AMX coprocessor via the Accelerate framework's `cblas_sgemm`.
59///
60/// # Why it's entropic
61/// The AMX is a dedicated coprocessor on the Apple Silicon die with its own
62/// register file, pipeline, and memory paths. Its timing depends on:
63/// - Pipeline occupancy from ALL prior AMX operations (every process)
64/// - Memory bandwidth contention on the unified memory controller
65/// - Power state transitions (idle → active ramp-up latency)
66/// - SLC (System Level Cache) eviction patterns
67/// - Thermal throttling affecting AMX frequency independently of CPU cores
68///
69/// # What makes it unique
70/// No prior work has used AMX coprocessor timing as an entropy source. The AMX
71/// is a completely independent execution domain from CPU cores, providing
72/// entropy that is uncorrelated with CPU-based timing sources.
73///
74/// # Configuration
75/// See [`AMXTimingConfig`] for tunable parameters. Key options:
76/// - `interleave_memory_ops`: disrupts pipeline steady-state
77/// - `matrix_sizes`: controls which AMX pipeline configurations are exercised
78#[derive(Default)]
79pub struct AMXTimingSource {
80    /// Source configuration. Use `Default::default()` for recommended settings.
81    pub config: AMXTimingConfig,
82}
83
84static AMX_TIMING_INFO: SourceInfo = SourceInfo {
85    name: "amx_timing",
86    description: "Apple AMX coprocessor matrix multiply timing jitter",
87    physics: "Dispatches matrix multiplications to the AMX (Apple Matrix eXtensions) \
88              coprocessor via Accelerate BLAS and measures per-operation timing. The AMX is \
89              a dedicated execution unit with its own pipeline, register file, and memory \
90              paths. Timing depends on: AMX pipeline occupancy from ALL system AMX users, \
91              memory bandwidth contention, AMX power state transitions, and SLC cache state. \
92              Interleaved memory operations disrupt pipeline steady-state for higher \
93              min-entropy. Matrix sizes are randomized via LCG to prevent predictor settling.",
94    category: SourceCategory::Microarch,
95    platform: Platform::MacOS,
96    requirements: &[Requirement::AppleSilicon],
97    entropy_rate_estimate: 1.5,
98    composite: false,
99    is_fast: true,
100};
101
102impl EntropySource for AMXTimingSource {
103    fn info(&self) -> &SourceInfo {
104        &AMX_TIMING_INFO
105    }
106
107    fn is_available(&self) -> bool {
108        cfg!(all(target_os = "macos", target_arch = "aarch64"))
109    }
110
111    fn collect(&self, n_samples: usize) -> Vec<u8> {
112        #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
113        {
114            let _ = n_samples;
115            Vec::new()
116        }
117
118        #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
119        {
120            // Always use extract_timing_entropy (VN debiasing is too lossy).
121            let raw_count = n_samples + 64;
122            let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
123
124            let sizes = &self.config.matrix_sizes;
125            if sizes.is_empty() {
126                return Vec::new();
127            }
128            let mut lcg: u64 = mach_time() | 1;
129
130            let interleave = self.config.interleave_memory_ops;
131            let mut scratch = if interleave {
132                vec![0u8; 65536]
133            } else {
134                Vec::new()
135            };
136
137            // Pre-allocate matrices at the maximum size to avoid per-iteration allocation.
138            let max_n = *sizes.iter().max().unwrap_or(&128);
139            let max_len = max_n * max_n;
140            let mut a = vec![0.0f32; max_len];
141            let mut b = vec![0.0f32; max_len];
142            let mut c = vec![0.0f32; max_len];
143
144            for _i in 0..raw_count {
145                // Randomize matrix size via LCG instead of deterministic cycling.
146                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
147                let n = sizes[(lcg >> 32) as usize % sizes.len()];
148                let len = n * n;
149
150                for val in a[..len].iter_mut().chain(b[..len].iter_mut()) {
151                    lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
152                    *val = (lcg >> 32) as f32 / u32::MAX as f32;
153                }
154
155                if interleave && !scratch.is_empty() {
156                    lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
157                    let idx = (lcg >> 32) as usize % scratch.len();
158                    unsafe {
159                        let ptr = scratch.as_mut_ptr().add(idx);
160                        std::ptr::write_volatile(ptr, std::ptr::read_volatile(ptr).wrapping_add(1));
161                    }
162                }
163
164                let t0 = mach_time();
165                // Randomize transpose via LCG instead of deterministic cycling.
166                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
167                let trans_b = if (lcg >> 33) & 1 == 0 { 112 } else { 111 }; // CblasTrans vs CblasNoTrans
168
169                // SAFETY: cblas_sgemm is a well-defined C function from the Accelerate
170                // framework. On Apple Silicon, this dispatches to the AMX coprocessor.
171                unsafe {
172                    cblas_sgemm(
173                        101, // CblasRowMajor
174                        111, // CblasNoTrans
175                        trans_b,
176                        n as i32,
177                        n as i32,
178                        n as i32,
179                        1.0,
180                        a.as_ptr(),
181                        n as i32,
182                        b.as_ptr(),
183                        n as i32,
184                        0.0,
185                        c.as_mut_ptr(),
186                        n as i32,
187                    );
188                }
189
190                let t1 = mach_time();
191                std::hint::black_box(&c);
192                timings.push(t1.wrapping_sub(t0));
193            }
194
195            extract_timing_entropy(&timings, n_samples)
196        }
197    }
198}
199
200// Accelerate framework CBLAS binding (Apple-provided, always available on macOS).
201#[cfg(target_os = "macos")]
202unsafe extern "C" {
203    fn cblas_sgemm(
204        order: i32,
205        transa: i32,
206        transb: i32,
207        m: i32,
208        n: i32,
209        k: i32,
210        alpha: f32,
211        a: *const f32,
212        lda: i32,
213        b: *const f32,
214        ldb: i32,
215        beta: f32,
216        c: *mut f32,
217        ldc: i32,
218    );
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn info() {
227        let src = AMXTimingSource::default();
228        assert_eq!(src.name(), "amx_timing");
229        assert_eq!(src.info().category, SourceCategory::Microarch);
230        assert!(!src.info().composite);
231    }
232
233    #[test]
234    fn default_config() {
235        let config = AMXTimingConfig::default();
236        assert_eq!(config.matrix_sizes, vec![16, 32, 48, 64, 96, 128]);
237        assert!(config.interleave_memory_ops);
238    }
239
240    #[test]
241    fn custom_config() {
242        let src = AMXTimingSource {
243            config: AMXTimingConfig {
244                matrix_sizes: vec![32, 64],
245                interleave_memory_ops: false,
246            },
247        };
248        assert_eq!(src.config.matrix_sizes.len(), 2);
249        assert!(!src.config.interleave_memory_ops);
250    }
251
252    #[test]
253    fn empty_sizes_returns_empty() {
254        let src = AMXTimingSource {
255            config: AMXTimingConfig {
256                matrix_sizes: vec![],
257                interleave_memory_ops: false,
258            },
259        };
260        if src.is_available() {
261            assert!(src.collect(64).is_empty());
262        }
263    }
264
265    #[test]
266    #[ignore] // Requires macOS aarch64
267    fn collects_bytes() {
268        let src = AMXTimingSource::default();
269        if src.is_available() {
270            let data = src.collect(128);
271            assert!(!data.is_empty());
272            assert!(data.len() <= 128);
273        }
274    }
275}