openentropy_core/sources/frontier/
gpu_divergence.rs

1//! GPU shader thread divergence — intra-warp nondeterminism entropy.
2//!
3//! GPU threads (SIMD groups) should execute in lockstep but don't due to:
4//! - Warp divergence from conditional branches
5//! - Memory coalescing failures
6//! - Thermal effects on GPU clock frequency
7//! - L2 cache bank conflicts
8//!
9//! We dispatch a Metal compute shader where threads race to atomically
10//! increment a counter. The execution order captures GPU scheduling
11//! nondeterminism that is genuinely novel as an entropy source.
12//!
13//! Uses direct Metal framework FFI via Objective-C runtime — no external
14//! process spawning. Each dispatch completes in microseconds.
15//!
16
17use crate::source::{EntropySource, Platform, Requirement, SourceCategory, SourceInfo};
18#[cfg(target_os = "macos")]
19use crate::sources::helpers::extract_timing_entropy;
20#[cfg(target_os = "macos")]
21use crate::sources::helpers::mach_time;
22#[cfg(target_os = "macos")]
23use crate::sources::helpers::xor_fold_u64;
24
25static GPU_DIVERGENCE_INFO: SourceInfo = SourceInfo {
26    name: "gpu_divergence",
27    description: "GPU shader thread execution order divergence entropy",
28    physics: "Dispatches Metal compute shaders where parallel threads race to atomically \
29              increment a shared counter. The execution order captures GPU scheduling \
30              nondeterminism from: SIMD group divergence on conditional branches, memory \
31              coalescing failures, L2 cache bank conflicts, thermal-dependent GPU clock \
32              frequency variation, and warp scheduler arbitration. Each dispatch produces \
33              a different execution ordering due to physical nondeterminism in the GPU.",
34    category: SourceCategory::GPU,
35    platform: Platform::MacOS,
36    requirements: &[Requirement::Metal],
37    entropy_rate_estimate: 6000.0,
38    composite: false,
39};
40
41/// Entropy source that harvests thread execution order divergence from Metal GPU.
42pub struct GPUDivergenceSource;
43
44/// Metal framework FFI via Objective-C runtime (macOS only).
45#[cfg(target_os = "macos")]
46mod metal {
47    use std::ffi::{CString, c_void};
48
49    // Objective-C runtime types.
50    type Id = *mut c_void;
51    type Sel = *mut c_void;
52    type Class = *mut c_void;
53
54    #[link(name = "objc", kind = "dylib")]
55    unsafe extern "C" {
56        fn objc_getClass(name: *const i8) -> Class;
57        fn sel_registerName(name: *const i8) -> Sel;
58        fn objc_msgSend(receiver: Id, sel: Sel, ...) -> Id;
59    }
60
61    // Metal framework link — ensures the framework is loaded.
62    #[link(name = "Metal", kind = "framework")]
63    unsafe extern "C" {
64        fn MTLCreateSystemDefaultDevice() -> Id;
65    }
66
67    /// Number of GPU threads per dispatch.
68    const THREADS: u32 = 256;
69
70    /// Metal shader source: threads race to atomically increment a counter.
71    /// The `order` output captures the nondeterministic execution ordering.
72    const SHADER_SOURCE: &str = r#"
73#include <metal_stdlib>
74using namespace metal;
75kernel void divergence(
76    device atomic_uint *counter [[buffer(0)]],
77    device uint *output [[buffer(1)]],
78    uint tid [[thread_position_in_grid]]
79) {
80    // Data-dependent work to create divergence.
81    uint val = tid;
82    for (uint i = 0; i < 16; i++) {
83        if (val & 1) { val = val * 3 + 1; }
84        else { val = val >> 1; }
85    }
86    // Atomic increment — order captures scheduling nondeterminism.
87    uint order = atomic_fetch_add_explicit(counter, 1, memory_order_relaxed);
88    output[tid] = order ^ val;
89}
90"#;
91
92    /// Opaque handle to a reusable Metal pipeline + buffers.
93    pub struct MetalState {
94        _device: Id,
95        queue: Id,
96        pipeline: Id,
97        counter_buf: Id,
98        output_buf: Id,
99    }
100
101    // SAFETY: Metal objects are reference-counted and thread-safe.
102    // We only use them from a single thread within `collect()`.
103    unsafe impl Send for MetalState {}
104
105    /// Cast `objc_msgSend` to a concrete function pointer type.
106    ///
107    /// We must go through a raw pointer because `objc_msgSend` is a variadic
108    /// extern fn which is a zero-sized type that cannot be transmuted directly.
109    macro_rules! msg_send_fn {
110        ($ty:ty) => {
111            std::mem::transmute::<*const (), $ty>(objc_msgSend as *const ())
112        };
113    }
114
115    impl MetalState {
116        /// Try to initialize Metal device, compile shader, create buffers.
117        pub fn new() -> Option<Self> {
118            unsafe {
119                // SAFETY: MTLCreateSystemDefaultDevice returns a retained Metal device
120                // object or null if no GPU is available.
121                let device = MTLCreateSystemDefaultDevice();
122                if device.is_null() {
123                    return None;
124                }
125
126                let queue = msg_send(device, "newCommandQueue");
127                if queue.is_null() {
128                    return None;
129                }
130
131                let pipeline = compile_shader(device)?;
132
133                let counter_buf = new_buffer(device, 4); // 1 x uint32
134                let output_buf = new_buffer(device, THREADS as u64 * 4); // THREADS x uint32
135                if counter_buf.is_null() || output_buf.is_null() {
136                    return None;
137                }
138
139                Some(MetalState {
140                    _device: device,
141                    queue,
142                    pipeline,
143                    counter_buf,
144                    output_buf,
145                })
146            }
147        }
148
149        /// Dispatch one compute pass and return the output buffer contents.
150        pub fn dispatch(&self) -> Option<Vec<u32>> {
151            unsafe {
152                // Zero the counter.
153                // SAFETY: counter_buf is a shared MTLBuffer we created. `contents` returns
154                // a valid pointer to the buffer's CPU-accessible memory.
155                let counter_ptr = msg_send(self.counter_buf, "contents") as *mut u32;
156                if counter_ptr.is_null() {
157                    return None;
158                }
159                *counter_ptr = 0;
160
161                let cmd_buf = msg_send(self.queue, "commandBuffer");
162                if cmd_buf.is_null() {
163                    return None;
164                }
165
166                let encoder = msg_send(cmd_buf, "computeCommandEncoder");
167                if encoder.is_null() {
168                    return None;
169                }
170
171                // encoder.setComputePipelineState_(pipeline)
172                let sel_set_pipeline = sel("setComputePipelineState:");
173                msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id))(
174                    encoder,
175                    sel_set_pipeline,
176                    self.pipeline,
177                );
178
179                // encoder.setBuffer_offset_atIndex_(counter_buf, 0, 0)
180                set_buffer(encoder, self.counter_buf, 0, 0);
181                // encoder.setBuffer_offset_atIndex_(output_buf, 0, 1)
182                set_buffer(encoder, self.output_buf, 0, 1);
183
184                dispatch_threads_1d(encoder, THREADS, THREADS.min(256));
185
186                // End encoding, commit, wait.
187                msg_send_fn!(unsafe extern "C" fn(Id, Sel))(encoder, sel("endEncoding"));
188                msg_send_fn!(unsafe extern "C" fn(Id, Sel))(cmd_buf, sel("commit"));
189                msg_send_fn!(unsafe extern "C" fn(Id, Sel))(cmd_buf, sel("waitUntilCompleted"));
190
191                // Read output.
192                let output_ptr = msg_send(self.output_buf, "contents") as *const u32;
193                if output_ptr.is_null() {
194                    return None;
195                }
196                let mut result = vec![0u32; THREADS as usize];
197                std::ptr::copy_nonoverlapping(output_ptr, result.as_mut_ptr(), THREADS as usize);
198                Some(result)
199            }
200        }
201    }
202
203    // -----------------------------------------------------------------------
204    // Objective-C runtime helpers
205    // -----------------------------------------------------------------------
206
207    unsafe fn sel(name: &str) -> Sel {
208        let c_name = CString::new(name).expect("selector contains null byte");
209        unsafe { sel_registerName(c_name.as_ptr()) }
210    }
211
212    unsafe fn msg_send(obj: Id, sel_name: &str) -> Id {
213        unsafe {
214            let s = sel(sel_name);
215            msg_send_fn!(unsafe extern "C" fn(Id, Sel) -> Id)(obj, s)
216        }
217    }
218
219    /// Create an NSString from a Rust &str.
220    unsafe fn nsstring(s: &str) -> Id {
221        unsafe {
222            let cls = objc_getClass(c"NSString".as_ptr());
223            let sel_alloc = sel("alloc");
224            let sel_init = sel("initWithBytes:length:encoding:");
225
226            let raw = msg_send_fn!(unsafe extern "C" fn(Id, Sel) -> Id)(cls as Id, sel_alloc);
227            // NSUTF8StringEncoding = 4
228            msg_send_fn!(unsafe extern "C" fn(Id, Sel, *const u8, u64, u64) -> Id)(
229                raw,
230                sel_init,
231                s.as_ptr(),
232                s.len() as u64,
233                4,
234            )
235        }
236    }
237
238    /// Compile the Metal shader source and return a compute pipeline state.
239    unsafe fn compile_shader(device: Id) -> Option<Id> {
240        unsafe {
241            let source = nsstring(SHADER_SOURCE);
242            if source.is_null() {
243                return None;
244            }
245
246            // device.newLibraryWithSource:options:error:
247            let sel_lib = sel("newLibraryWithSource:options:error:");
248            let mut error: Id = std::ptr::null_mut();
249            let library = msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, Id, *mut Id) -> Id)(
250                device,
251                sel_lib,
252                source,
253                std::ptr::null_mut(), // default options
254                &mut error,
255            );
256            if library.is_null() {
257                return None;
258            }
259
260            // library.newFunctionWithName:("divergence")
261            let func_name = nsstring("divergence");
262            let sel_func = sel("newFunctionWithName:");
263            let function =
264                msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id) -> Id)(library, sel_func, func_name);
265            if function.is_null() {
266                return None;
267            }
268
269            // device.newComputePipelineStateWithFunction:error:
270            let sel_pipe = sel("newComputePipelineStateWithFunction:error:");
271            let mut error2: Id = std::ptr::null_mut();
272            let pipeline = msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, *mut Id) -> Id)(
273                device,
274                sel_pipe,
275                function,
276                &mut error2,
277            );
278            if pipeline.is_null() {
279                return None;
280            }
281
282            Some(pipeline)
283        }
284    }
285
286    /// Create a shared MTLBuffer of given size.
287    unsafe fn new_buffer(device: Id, size: u64) -> Id {
288        unsafe {
289            let sel_buf = sel("newBufferWithLength:options:");
290            // MTLResourceStorageModeShared = 0
291            msg_send_fn!(unsafe extern "C" fn(Id, Sel, u64, u64) -> Id)(device, sel_buf, size, 0)
292        }
293    }
294
295    /// Set a buffer on a compute command encoder.
296    unsafe fn set_buffer(encoder: Id, buffer: Id, offset: u64, index: u64) {
297        unsafe {
298            let s = sel("setBuffer:offset:atIndex:");
299            msg_send_fn!(unsafe extern "C" fn(Id, Sel, Id, u64, u64))(
300                encoder, s, buffer, offset, index,
301            );
302        }
303    }
304
305    /// Dispatch 1D threads on a compute command encoder.
306    unsafe fn dispatch_threads_1d(encoder: Id, total: u32, per_group: u32) {
307        // MTLSize is a struct of 3 x NSUInteger (u64 on 64-bit).
308        #[repr(C)]
309        struct MTLSize {
310            width: u64,
311            height: u64,
312            depth: u64,
313        }
314
315        let grid = MTLSize {
316            width: total as u64,
317            height: 1,
318            depth: 1,
319        };
320        let group = MTLSize {
321            width: per_group as u64,
322            height: 1,
323            depth: 1,
324        };
325
326        unsafe {
327            let s = sel("dispatchThreads:threadsPerThreadgroup:");
328            msg_send_fn!(unsafe extern "C" fn(Id, Sel, MTLSize, MTLSize))(encoder, s, grid, group);
329        }
330    }
331}
332
333impl EntropySource for GPUDivergenceSource {
334    fn info(&self) -> &SourceInfo {
335        &GPU_DIVERGENCE_INFO
336    }
337
338    fn is_available(&self) -> bool {
339        #[cfg(target_os = "macos")]
340        {
341            metal::MetalState::new().is_some()
342        }
343        #[cfg(not(target_os = "macos"))]
344        {
345            false
346        }
347    }
348
349    fn collect(&self, n_samples: usize) -> Vec<u8> {
350        #[cfg(not(target_os = "macos"))]
351        {
352            let _ = n_samples;
353            Vec::new()
354        }
355
356        #[cfg(target_os = "macos")]
357        {
358            let state = match metal::MetalState::new() {
359                Some(s) => s,
360                None => return Vec::new(),
361            };
362
363            let raw_count = n_samples * 4 + 64;
364            let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
365            let mut gpu_entropy: Vec<u8> = Vec::with_capacity(raw_count);
366
367            for _ in 0..raw_count {
368                let t0 = mach_time();
369
370                // GPU dispatch crosses CPU→GPU→CPU clock domains.
371                let results = match state.dispatch() {
372                    Some(r) => r,
373                    None => continue,
374                };
375
376                let t1 = mach_time();
377                timings.push(t1.wrapping_sub(t0));
378
379                // XOR-fold all thread execution orders into one byte.
380                // This captures GPU scheduling nondeterminism directly.
381                let mut gpu_hash: u64 = 0;
382                for (i, &val) in results.iter().enumerate() {
383                    gpu_hash ^= (val as u64).rotate_left((i as u32) & 63);
384                }
385                gpu_entropy.push(xor_fold_u64(gpu_hash));
386            }
387
388            // Extract timing entropy from dispatch latencies.
389            let timing_bytes = extract_timing_entropy(&timings, n_samples);
390
391            // XOR GPU execution order entropy with dispatch timing entropy.
392            // Both are genuine, independent entropy sources.
393            let mut output: Vec<u8> = Vec::with_capacity(n_samples);
394            for i in 0..n_samples.min(timing_bytes.len()).min(gpu_entropy.len()) {
395                output.push(timing_bytes[i] ^ gpu_entropy[i]);
396            }
397
398            output.truncate(n_samples);
399            output
400        }
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn info() {
410        let src = GPUDivergenceSource;
411        assert_eq!(src.name(), "gpu_divergence");
412        assert_eq!(src.info().category, SourceCategory::GPU);
413        assert!(!src.info().composite);
414    }
415
416    #[test]
417    #[cfg(target_os = "macos")]
418    #[ignore] // Requires GPU
419    fn collects_bytes() {
420        let src = GPUDivergenceSource;
421        if src.is_available() {
422            let data = src.collect(64);
423            assert!(!data.is_empty());
424            assert!(data.len() <= 64);
425            let unique: std::collections::HashSet<u8> = data.iter().copied().collect();
426            assert!(unique.len() > 1, "Expected variation in collected bytes");
427        }
428    }
429}
openentropy_core/sources/frontier/gpu_divergence.rs

openentropy_core/sources/frontier/
gpu_divergence.rs