1use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
18#[cfg(target_os = "macos")]
19use crate::sources::helpers::extract_timing_entropy;
20#[cfg(target_os = "macos")]
21use crate::sources::helpers::mach_time;
22#[cfg(target_os = "macos")]
23use crate::sources::helpers::xor_fold_u64;
24#[cfg(target_os = "macos")]
25use std::sync::OnceLock;
26
27static GPU_DIVERGENCE_INFO: SourceInfo = SourceInfo {
28 name: "gpu_divergence",
29 description: "GPU shader thread execution order divergence entropy",
30 physics: "Dispatches Metal compute shaders where parallel threads race to atomically \
31 increment a shared counter. The execution order captures GPU scheduling \
32 nondeterminism from: SIMD group divergence on conditional branches, memory \
33 coalescing failures, L2 cache bank conflicts, thermal-dependent GPU clock \
34 frequency variation, and warp scheduler arbitration. Each dispatch produces \
35 a different execution ordering due to physical nondeterminism in the GPU.",
36 category: SourceCategory::GPU,
37 platform: Platform::MacOS,
38 requirements: &[Requirement::Metal],
39 entropy_rate_estimate: 4.0,
40 composite: false,
41 is_fast: true,
42};
43
44pub struct GPUDivergenceSource;
46
47#[cfg(target_os = "macos")]
49mod metal {
50 use std::ffi::{CString, c_void};
51
52 type Id = *mut c_void;
54 type Sel = *mut c_void;
55 type Class = *mut c_void;
56
57 #[link(name = "objc", kind = "dylib")]
58 unsafe extern "C" {
59 fn objc_getClass(name: *const i8) -> Class;
60 fn sel_registerName(name: *const i8) -> Sel;
61 fn objc_msgSend(receiver: Id, sel: Sel, ...) -> Id;
62 }
63
64 #[link(name = "Metal", kind = "framework")]
66 unsafe extern "C" {
67 fn MTLCreateSystemDefaultDevice() -> Id;
68 }
69
70 const THREADS: u32 = 256;
72
73 const SHADER_SOURCE: &str = r#"
76#include <metal_stdlib>
77using namespace metal;
78kernel void divergence(
79 device atomic_uint *counter [[buffer(0)]],
80 device uint *output [[buffer(1)]],
81 uint tid [[thread_position_in_grid]]
82) {
83 // Data-dependent work to create divergence.
84 uint val = tid;
85 for (uint i = 0; i < 16; i++) {
86 if (val & 1) { val = val * 3 + 1; }
87 else { val = val >> 1; }
88 }
89 // Atomic increment — order captures scheduling nondeterminism.
90 uint order = atomic_fetch_add_explicit(counter, 1, memory_order_relaxed);
91 output[tid] = order ^ val;
92}
93"#;
94
95 pub struct MetalState {
97 _device: Id,
98 queue: Id,
99 pipeline: Id,
100 counter_buf: Id,
101 output_buf: Id,
102 }
103
104 unsafe impl Send for MetalState {}
107
108 macro_rules! msg_send_fn {
113 ($ty:ty) => {
114 std::mem::transmute::<*const (), $ty>(objc_msgSend as *const ())
115 };
116 }
117
118 impl MetalState {
119 pub fn new() -> Option<Self> {
121 unsafe {
122 let device = MTLCreateSystemDefaultDevice();
125 if device.is_null() {
126 return None;
127 }
128
129 let queue = msg_send(device, "newCommandQueue");
130 if queue.is_null() {
131 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(device, sel("release"));
133 return None;
134 }
135
136 let pipeline = match compile_shader(device) {
137 Some(p) => p,
138 None => {
139 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(queue, sel("release"));
140 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(device, sel("release"));
141 return None;
142 }
143 };
144
145 let counter_buf = new_buffer(device, 4); let output_buf = new_buffer(device, THREADS as u64 * 4); if counter_buf.is_null() || output_buf.is_null() {
148 if !output_buf.is_null() {
149 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(output_buf, sel("release"));
150 }
151 if !counter_buf.is_null() {
152 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(counter_buf, sel("release"));
153 }
154 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(pipeline, sel("release"));
155 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(queue, sel("release"));
156 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(device, sel("release"));
157 return None;
158 }
159
160 Some(MetalState {
161 _device: device,
162 queue,
163 pipeline,
164 counter_buf,
165 output_buf,
166 })
167 }
168 }
169
170 pub fn dispatch(&self) -> Option<Vec<u32>> {
172 unsafe {
173 let counter_ptr = msg_send(self.counter_buf, "contents") as *mut u32;
177 if counter_ptr.is_null() {
178 return None;
179 }
180 *counter_ptr = 0;
181
182 let cmd_buf = msg_send(self.queue, "commandBuffer");
183 if cmd_buf.is_null() {
184 return None;
185 }
186
187 let encoder = msg_send(cmd_buf, "computeCommandEncoder");
188 if encoder.is_null() {
189 return None;
190 }
191
192 let sel_set_pipeline = sel("setComputePipelineState:");
194 msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id))(
195 encoder,
196 sel_set_pipeline,
197 self.pipeline,
198 );
199
200 set_buffer(encoder, self.counter_buf, 0, 0);
202 set_buffer(encoder, self.output_buf, 0, 1);
204
205 dispatch_threads_1d(encoder, THREADS, THREADS.min(256));
206
207 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(encoder, sel("endEncoding"));
209 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(cmd_buf, sel("commit"));
210 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(cmd_buf, sel("waitUntilCompleted"));
211
212 let output_ptr = msg_send(self.output_buf, "contents") as *const u32;
214 if output_ptr.is_null() {
215 return None;
216 }
217 let mut result = vec![0u32; THREADS as usize];
218 std::ptr::copy_nonoverlapping(output_ptr, result.as_mut_ptr(), THREADS as usize);
219 Some(result)
220 }
221 }
222 }
223
224 unsafe fn sel(name: &str) -> Sel {
229 let c_name = CString::new(name).expect("selector contains null byte");
230 unsafe { sel_registerName(c_name.as_ptr()) }
231 }
232
233 unsafe fn msg_send(obj: Id, sel_name: &str) -> Id {
234 unsafe {
235 let s = sel(sel_name);
236 msg_send_fn!(unsafe extern "C" fn(Id, Sel) -> Id)(obj, s)
237 }
238 }
239
240 unsafe fn nsstring(s: &str) -> Id {
242 unsafe {
243 let cls = objc_getClass(c"NSString".as_ptr());
244 let sel_alloc = sel("alloc");
245 let sel_init = sel("initWithBytes:length:encoding:");
246
247 let raw = msg_send_fn!(unsafe extern "C" fn(Id, Sel) -> Id)(cls as Id, sel_alloc);
248 msg_send_fn!(unsafe extern "C" fn(Id, Sel, *const u8, u64, u64) -> Id)(
250 raw,
251 sel_init,
252 s.as_ptr(),
253 s.len() as u64,
254 4,
255 )
256 }
257 }
258
259 unsafe fn objc_release(obj: Id) {
261 if !obj.is_null() {
262 unsafe {
263 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(obj, sel("release"));
264 }
265 }
266 }
267
268 unsafe fn compile_shader(device: Id) -> Option<Id> {
271 unsafe {
272 let source = nsstring(SHADER_SOURCE);
273 if source.is_null() {
274 return None;
275 }
276
277 let sel_lib = sel("newLibraryWithSource:options:error:");
279 let mut error: Id = std::ptr::null_mut();
280 let library = msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, Id, *mut Id) -> Id)(
281 device,
282 sel_lib,
283 source,
284 std::ptr::null_mut(), &mut error,
286 );
287 if library.is_null() {
288 objc_release(error);
289 objc_release(source);
290 return None;
291 }
292 objc_release(source);
294
295 let func_name = nsstring("divergence");
297 let sel_func = sel("newFunctionWithName:");
298 let function =
299 msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id) -> Id)(library, sel_func, func_name);
300 objc_release(func_name);
301 if function.is_null() {
302 objc_release(library);
303 return None;
304 }
305
306 let sel_pipe = sel("newComputePipelineStateWithFunction:error:");
308 let mut error2: Id = std::ptr::null_mut();
309 let pipeline = msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, *mut Id) -> Id)(
310 device,
311 sel_pipe,
312 function,
313 &mut error2,
314 );
315
316 objc_release(function);
318 objc_release(library);
319
320 if pipeline.is_null() {
321 objc_release(error2);
322 return None;
323 }
324
325 Some(pipeline)
326 }
327 }
328
329 unsafe fn new_buffer(device: Id, size: u64) -> Id {
331 unsafe {
332 let sel_buf = sel("newBufferWithLength:options:");
333 msg_send_fn!(unsafe extern "C" fn(Id, Sel, u64, u64) -> Id)(device, sel_buf, size, 0)
335 }
336 }
337
338 unsafe fn set_buffer(encoder: Id, buffer: Id, offset: u64, index: u64) {
340 unsafe {
341 let s = sel("setBuffer:offset:atIndex:");
342 msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, u64, u64))(
343 encoder, s, buffer, offset, index,
344 );
345 }
346 }
347
348 unsafe fn dispatch_threads_1d(encoder: Id, total: u32, per_group: u32) {
350 #[repr(C)]
352 struct MTLSize {
353 width: u64,
354 height: u64,
355 depth: u64,
356 }
357
358 let grid = MTLSize {
359 width: total as u64,
360 height: 1,
361 depth: 1,
362 };
363 let group = MTLSize {
364 width: per_group as u64,
365 height: 1,
366 depth: 1,
367 };
368
369 unsafe {
370 let s = sel("dispatchThreads:threadsPerThreadgroup:");
371 msg_send_fn!(unsafe extern "C" fn(Id, Sel, MTLSize, MTLSize))(encoder, s, grid, group);
372 }
373 }
374
375 impl Drop for MetalState {
376 fn drop(&mut self) {
377 unsafe {
379 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.output_buf, sel("release"));
380 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.counter_buf, sel("release"));
381 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.pipeline, sel("release"));
382 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.queue, sel("release"));
383 msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self._device, sel("release"));
384 }
385 }
386 }
387}
388
389impl EntropySource for GPUDivergenceSource {
390 fn info(&self) -> &SourceInfo {
391 &GPU_DIVERGENCE_INFO
392 }
393
394 fn is_available(&self) -> bool {
395 #[cfg(target_os = "macos")]
396 {
397 static METAL_AVAILABLE: OnceLock<bool> = OnceLock::new();
398 *METAL_AVAILABLE.get_or_init(|| metal::MetalState::new().is_some())
399 }
400 #[cfg(not(target_os = "macos"))]
401 {
402 false
403 }
404 }
405
406 fn collect(&self, n_samples: usize) -> Vec<u8> {
407 #[cfg(not(target_os = "macos"))]
408 {
409 let _ = n_samples;
410 Vec::new()
411 }
412
413 #[cfg(target_os = "macos")]
414 {
415 let state = match metal::MetalState::new() {
416 Some(s) => s,
417 None => return Vec::new(),
418 };
419
420 let raw_count = n_samples * 2 + 64;
421 let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
422 let mut gpu_entropy: Vec<u8> = Vec::with_capacity(raw_count);
423
424 for _ in 0..raw_count {
425 let t0 = mach_time();
426
427 let results = match state.dispatch() {
429 Some(r) => r,
430 None => continue,
431 };
432
433 let t1 = mach_time();
434 timings.push(t1.wrapping_sub(t0));
435
436 let mut gpu_hash: u64 = 0;
439 for (i, &val) in results.iter().enumerate() {
440 gpu_hash ^= (val as u64).rotate_left((i as u32) & 63);
441 }
442 gpu_entropy.push(xor_fold_u64(gpu_hash));
443 }
444
445 let timing_bytes = extract_timing_entropy(&timings, n_samples);
447
448 let mut output: Vec<u8> = Vec::with_capacity(n_samples);
451 for i in 0..n_samples.min(timing_bytes.len()).min(gpu_entropy.len()) {
452 output.push(timing_bytes[i] ^ gpu_entropy[i]);
453 }
454
455 output.truncate(n_samples);
456 output
457 }
458 }
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464
465 #[test]
466 fn info() {
467 let src = GPUDivergenceSource;
468 assert_eq!(src.name(), "gpu_divergence");
469 assert_eq!(src.info().category, SourceCategory::GPU);
470 assert!(!src.info().composite);
471 }
472
473 #[test]
474 #[cfg(target_os = "macos")]
475 #[ignore] fn collects_bytes() {
477 let src = GPUDivergenceSource;
478 if src.is_available() {
479 let data = src.collect(64);
480 assert!(!data.is_empty());
481 assert!(data.len() <= 64);
482 let unique: std::collections::HashSet<u8> = data.iter().copied().collect();
483 assert!(unique.len() > 1, "Expected variation in collected bytes");
484 }
485 }
486}