openentropy_core/sources/microarch/
amx_timing.rs1use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
4#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
5use crate::sources::helpers::{extract_timing_entropy, mach_time};
6
7#[derive(Debug, Clone)]
22pub struct AMXTimingConfig {
23 pub matrix_sizes: Vec<usize>,
34
35 pub interleave_memory_ops: bool,
43}
44
45impl Default for AMXTimingConfig {
46 fn default() -> Self {
47 Self {
48 matrix_sizes: vec![16, 32, 48, 64, 96, 128],
49 interleave_memory_ops: true,
50 }
51 }
52}
53
54#[derive(Default)]
79pub struct AMXTimingSource {
80 pub config: AMXTimingConfig,
82}
83
84static AMX_TIMING_INFO: SourceInfo = SourceInfo {
85 name: "amx_timing",
86 description: "Apple AMX coprocessor matrix multiply timing jitter",
87 physics: "Dispatches matrix multiplications to the AMX (Apple Matrix eXtensions) \
88 coprocessor via Accelerate BLAS and measures per-operation timing. The AMX is \
89 a dedicated execution unit with its own pipeline, register file, and memory \
90 paths. Timing depends on: AMX pipeline occupancy from ALL system AMX users, \
91 memory bandwidth contention, AMX power state transitions, and SLC cache state. \
92 Interleaved memory operations disrupt pipeline steady-state for higher \
93 min-entropy. Matrix sizes are randomized via LCG to prevent predictor settling.",
94 category: SourceCategory::Microarch,
95 platform: Platform::MacOS,
96 requirements: &[Requirement::AppleSilicon],
97 entropy_rate_estimate: 1.5,
98 composite: false,
99 is_fast: true,
100};
101
102impl EntropySource for AMXTimingSource {
103 fn info(&self) -> &SourceInfo {
104 &AMX_TIMING_INFO
105 }
106
107 fn is_available(&self) -> bool {
108 cfg!(all(target_os = "macos", target_arch = "aarch64"))
109 }
110
111 fn collect(&self, n_samples: usize) -> Vec<u8> {
112 #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
113 {
114 let _ = n_samples;
115 Vec::new()
116 }
117
118 #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
119 {
120 let raw_count = n_samples + 64;
122 let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
123
124 let sizes = &self.config.matrix_sizes;
125 if sizes.is_empty() {
126 return Vec::new();
127 }
128 let mut lcg: u64 = mach_time() | 1;
129
130 let interleave = self.config.interleave_memory_ops;
131 let mut scratch = if interleave {
132 vec![0u8; 65536]
133 } else {
134 Vec::new()
135 };
136
137 let max_n = *sizes.iter().max().unwrap_or(&128);
139 let max_len = max_n * max_n;
140 let mut a = vec![0.0f32; max_len];
141 let mut b = vec![0.0f32; max_len];
142 let mut c = vec![0.0f32; max_len];
143
144 for _i in 0..raw_count {
145 lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
147 let n = sizes[(lcg >> 32) as usize % sizes.len()];
148 let len = n * n;
149
150 for val in a[..len].iter_mut().chain(b[..len].iter_mut()) {
151 lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
152 *val = (lcg >> 32) as f32 / u32::MAX as f32;
153 }
154
155 if interleave && !scratch.is_empty() {
156 lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
157 let idx = (lcg >> 32) as usize % scratch.len();
158 unsafe {
159 let ptr = scratch.as_mut_ptr().add(idx);
160 std::ptr::write_volatile(ptr, std::ptr::read_volatile(ptr).wrapping_add(1));
161 }
162 }
163
164 let t0 = mach_time();
165 lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
167 let trans_b = if (lcg >> 33) & 1 == 0 { 112 } else { 111 }; unsafe {
172 cblas_sgemm(
173 101, 111, trans_b,
176 n as i32,
177 n as i32,
178 n as i32,
179 1.0,
180 a.as_ptr(),
181 n as i32,
182 b.as_ptr(),
183 n as i32,
184 0.0,
185 c.as_mut_ptr(),
186 n as i32,
187 );
188 }
189
190 let t1 = mach_time();
191 std::hint::black_box(&c);
192 timings.push(t1.wrapping_sub(t0));
193 }
194
195 extract_timing_entropy(&timings, n_samples)
196 }
197 }
198}
199
200#[cfg(target_os = "macos")]
202unsafe extern "C" {
203 fn cblas_sgemm(
204 order: i32,
205 transa: i32,
206 transb: i32,
207 m: i32,
208 n: i32,
209 k: i32,
210 alpha: f32,
211 a: *const f32,
212 lda: i32,
213 b: *const f32,
214 ldb: i32,
215 beta: f32,
216 c: *mut f32,
217 ldc: i32,
218 );
219}
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224
225 #[test]
226 fn info() {
227 let src = AMXTimingSource::default();
228 assert_eq!(src.name(), "amx_timing");
229 assert_eq!(src.info().category, SourceCategory::Microarch);
230 assert!(!src.info().composite);
231 }
232
233 #[test]
234 fn default_config() {
235 let config = AMXTimingConfig::default();
236 assert_eq!(config.matrix_sizes, vec![16, 32, 48, 64, 96, 128]);
237 assert!(config.interleave_memory_ops);
238 }
239
240 #[test]
241 fn custom_config() {
242 let src = AMXTimingSource {
243 config: AMXTimingConfig {
244 matrix_sizes: vec![32, 64],
245 interleave_memory_ops: false,
246 },
247 };
248 assert_eq!(src.config.matrix_sizes.len(), 2);
249 assert!(!src.config.interleave_memory_ops);
250 }
251
252 #[test]
253 fn empty_sizes_returns_empty() {
254 let src = AMXTimingSource {
255 config: AMXTimingConfig {
256 matrix_sizes: vec![],
257 interleave_memory_ops: false,
258 },
259 };
260 if src.is_available() {
261 assert!(src.collect(64).is_empty());
262 }
263 }
264
265 #[test]
266 #[ignore] fn collects_bytes() {
268 let src = AMXTimingSource::default();
269 if src.is_available() {
270 let data = src.collect(128);
271 assert!(!data.is_empty());
272 assert!(data.len() <= 128);
273 }
274 }
275}