use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
#[cfg(target_os = "macos")]
use crate::sources::helpers::extract_timing_entropy;
#[cfg(target_os = "macos")]
use crate::sources::helpers::mach_time;
#[cfg(target_os = "macos")]
use crate::sources::helpers::xor_fold_u64;
#[cfg(target_os = "macos")]
use std::sync::OnceLock;
static GPU_DIVERGENCE_INFO: SourceInfo = SourceInfo {
name: "gpu_divergence",
description: "GPU shader thread execution order divergence entropy",
physics: "Dispatches Metal compute shaders where parallel threads race to atomically \
increment a shared counter. The execution order captures GPU scheduling \
nondeterminism from: SIMD group divergence on conditional branches, memory \
coalescing failures, L2 cache bank conflicts, thermal-dependent GPU clock \
frequency variation, and warp scheduler arbitration. Each dispatch produces \
a different execution ordering due to physical nondeterminism in the GPU.",
category: SourceCategory::GPU,
platform: Platform::MacOS,
requirements: &[Requirement::Metal],
entropy_rate_estimate: 4.0,
composite: false,
is_fast: true,
};
pub struct GPUDivergenceSource;
#[cfg(target_os = "macos")]
mod metal {
use std::ffi::{CString, c_void};
type Id = *mut c_void;
type Sel = *mut c_void;
type Class = *mut c_void;
#[link(name = "objc", kind = "dylib")]
unsafe extern "C" {
fn objc_getClass(name: *const i8) -> Class;
fn sel_registerName(name: *const i8) -> Sel;
fn objc_msgSend(receiver: Id, sel: Sel, ...) -> Id;
}
#[link(name = "Metal", kind = "framework")]
unsafe extern "C" {
fn MTLCreateSystemDefaultDevice() -> Id;
}
const THREADS: u32 = 256;
const SHADER_SOURCE: &str = r#"
#include <metal_stdlib>
using namespace metal;
kernel void divergence(
device atomic_uint *counter [[buffer(0)]],
device uint *output [[buffer(1)]],
uint tid [[thread_position_in_grid]]
) {
// Data-dependent work to create divergence.
uint val = tid;
for (uint i = 0; i < 16; i++) {
if (val & 1) { val = val * 3 + 1; }
else { val = val >> 1; }
}
// Atomic increment — order captures scheduling nondeterminism.
uint order = atomic_fetch_add_explicit(counter, 1, memory_order_relaxed);
output[tid] = order ^ val;
}
"#;
pub struct MetalState {
_device: Id,
queue: Id,
pipeline: Id,
counter_buf: Id,
output_buf: Id,
}
unsafe impl Send for MetalState {}
macro_rules! msg_send_fn {
($ty:ty) => {
std::mem::transmute::<*const (), $ty>(objc_msgSend as *const ())
};
}
impl MetalState {
pub fn new() -> Option<Self> {
unsafe {
let device = MTLCreateSystemDefaultDevice();
if device.is_null() {
return None;
}
let queue = msg_send(device, "newCommandQueue");
if queue.is_null() {
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(device, sel("release"));
return None;
}
let pipeline = match compile_shader(device) {
Some(p) => p,
None => {
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(queue, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(device, sel("release"));
return None;
}
};
let counter_buf = new_buffer(device, 4); let output_buf = new_buffer(device, THREADS as u64 * 4); if counter_buf.is_null() || output_buf.is_null() {
if !output_buf.is_null() {
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(output_buf, sel("release"));
}
if !counter_buf.is_null() {
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(counter_buf, sel("release"));
}
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(pipeline, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(queue, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(device, sel("release"));
return None;
}
Some(MetalState {
_device: device,
queue,
pipeline,
counter_buf,
output_buf,
})
}
}
pub fn dispatch(&self) -> Option<Vec<u32>> {
unsafe {
let counter_ptr = msg_send(self.counter_buf, "contents") as *mut u32;
if counter_ptr.is_null() {
return None;
}
*counter_ptr = 0;
let cmd_buf = msg_send(self.queue, "commandBuffer");
if cmd_buf.is_null() {
return None;
}
let encoder = msg_send(cmd_buf, "computeCommandEncoder");
if encoder.is_null() {
return None;
}
let sel_set_pipeline = sel("setComputePipelineState:");
msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id))(
encoder,
sel_set_pipeline,
self.pipeline,
);
set_buffer(encoder, self.counter_buf, 0, 0);
set_buffer(encoder, self.output_buf, 0, 1);
dispatch_threads_1d(encoder, THREADS, THREADS.min(256));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(encoder, sel("endEncoding"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(cmd_buf, sel("commit"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(cmd_buf, sel("waitUntilCompleted"));
let output_ptr = msg_send(self.output_buf, "contents") as *const u32;
if output_ptr.is_null() {
return None;
}
let mut result = vec![0u32; THREADS as usize];
std::ptr::copy_nonoverlapping(output_ptr, result.as_mut_ptr(), THREADS as usize);
Some(result)
}
}
}
unsafe fn sel(name: &str) -> Sel {
let c_name = CString::new(name).expect("selector contains null byte");
unsafe { sel_registerName(c_name.as_ptr()) }
}
unsafe fn msg_send(obj: Id, sel_name: &str) -> Id {
unsafe {
let s = sel(sel_name);
msg_send_fn!(unsafe extern "C" fn(Id, Sel) -> Id)(obj, s)
}
}
unsafe fn nsstring(s: &str) -> Id {
unsafe {
let cls = objc_getClass(c"NSString".as_ptr());
let sel_alloc = sel("alloc");
let sel_init = sel("initWithBytes:length:encoding:");
let raw = msg_send_fn!(unsafe extern "C" fn(Id, Sel) -> Id)(cls as Id, sel_alloc);
msg_send_fn!(unsafe extern "C" fn(Id, Sel, *const u8, u64, u64) -> Id)(
raw,
sel_init,
s.as_ptr(),
s.len() as u64,
4,
)
}
}
unsafe fn objc_release(obj: Id) {
if !obj.is_null() {
unsafe {
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(obj, sel("release"));
}
}
}
unsafe fn compile_shader(device: Id) -> Option<Id> {
unsafe {
let source = nsstring(SHADER_SOURCE);
if source.is_null() {
return None;
}
let sel_lib = sel("newLibraryWithSource:options:error:");
let mut error: Id = std::ptr::null_mut();
let library = msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, Id, *mut Id) -> Id)(
device,
sel_lib,
source,
std::ptr::null_mut(), &mut error,
);
if library.is_null() {
objc_release(error);
objc_release(source);
return None;
}
objc_release(source);
let func_name = nsstring("divergence");
let sel_func = sel("newFunctionWithName:");
let function =
msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id) -> Id)(library, sel_func, func_name);
objc_release(func_name);
if function.is_null() {
objc_release(library);
return None;
}
let sel_pipe = sel("newComputePipelineStateWithFunction:error:");
let mut error2: Id = std::ptr::null_mut();
let pipeline = msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, *mut Id) -> Id)(
device,
sel_pipe,
function,
&mut error2,
);
objc_release(function);
objc_release(library);
if pipeline.is_null() {
objc_release(error2);
return None;
}
Some(pipeline)
}
}
unsafe fn new_buffer(device: Id, size: u64) -> Id {
unsafe {
let sel_buf = sel("newBufferWithLength:options:");
msg_send_fn!(unsafe extern "C" fn(Id, Sel, u64, u64) -> Id)(device, sel_buf, size, 0)
}
}
unsafe fn set_buffer(encoder: Id, buffer: Id, offset: u64, index: u64) {
unsafe {
let s = sel("setBuffer:offset:atIndex:");
msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, u64, u64))(
encoder, s, buffer, offset, index,
);
}
}
unsafe fn dispatch_threads_1d(encoder: Id, total: u32, per_group: u32) {
#[repr(C)]
struct MTLSize {
width: u64,
height: u64,
depth: u64,
}
let grid = MTLSize {
width: total as u64,
height: 1,
depth: 1,
};
let group = MTLSize {
width: per_group as u64,
height: 1,
depth: 1,
};
unsafe {
let s = sel("dispatchThreads:threadsPerThreadgroup:");
msg_send_fn!(unsafe extern "C" fn(Id, Sel, MTLSize, MTLSize))(encoder, s, grid, group);
}
}
impl Drop for MetalState {
fn drop(&mut self) {
unsafe {
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.output_buf, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.counter_buf, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.pipeline, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self.queue, sel("release"));
msg_send_fn!(unsafe extern "C" fn(Id, Sel))(self._device, sel("release"));
}
}
}
}
impl EntropySource for GPUDivergenceSource {
fn info(&self) -> &SourceInfo {
&GPU_DIVERGENCE_INFO
}
fn is_available(&self) -> bool {
#[cfg(target_os = "macos")]
{
static METAL_AVAILABLE: OnceLock<bool> = OnceLock::new();
*METAL_AVAILABLE.get_or_init(|| metal::MetalState::new().is_some())
}
#[cfg(not(target_os = "macos"))]
{
false
}
}
fn collect(&self, n_samples: usize) -> Vec<u8> {
#[cfg(not(target_os = "macos"))]
{
let _ = n_samples;
Vec::new()
}
#[cfg(target_os = "macos")]
{
let state = match metal::MetalState::new() {
Some(s) => s,
None => return Vec::new(),
};
let raw_count = n_samples * 2 + 64;
let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
let mut gpu_entropy: Vec<u8> = Vec::with_capacity(raw_count);
for _ in 0..raw_count {
let t0 = mach_time();
let results = match state.dispatch() {
Some(r) => r,
None => continue,
};
let t1 = mach_time();
timings.push(t1.wrapping_sub(t0));
let mut gpu_hash: u64 = 0;
for (i, &val) in results.iter().enumerate() {
gpu_hash ^= (val as u64).rotate_left((i as u32) & 63);
}
gpu_entropy.push(xor_fold_u64(gpu_hash));
}
let timing_bytes = extract_timing_entropy(&timings, n_samples);
let mut output: Vec<u8> = Vec::with_capacity(n_samples);
for i in 0..n_samples.min(timing_bytes.len()).min(gpu_entropy.len()) {
output.push(timing_bytes[i] ^ gpu_entropy[i]);
}
output.truncate(n_samples);
output
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn info() {
let src = GPUDivergenceSource;
assert_eq!(src.name(), "gpu_divergence");
assert_eq!(src.info().category, SourceCategory::GPU);
assert!(!src.info().composite);
}
#[test]
#[cfg(target_os = "macos")]
#[ignore] fn collects_bytes() {
let src = GPUDivergenceSource;
if src.is_available() {
let data = src.collect(64);
assert!(!data.is_empty());
assert!(data.len() <= 64);
let unique: std::collections::HashSet<u8> = data.iter().copied().collect();
assert!(unique.len() > 1, "Expected variation in collected bytes");
}
}
}