//
// RingKernel WebGPU Shading Language Template
// Kernel ID: {{KERNEL_ID}}
// Generated by ringkernel-codegen
//
// Control block structure
// Note: WGSL lacks 64-bit atomics, so we split u64 values into lo/hi u32 pairs
struct ControlBlock {
is_active: atomic<u32>,
should_terminate: atomic<u32>,
has_terminated: atomic<u32>,
_pad1: u32,
// 64-bit values split into u32 pairs
messages_processed_lo: atomic<u32>,
messages_processed_hi: atomic<u32>,
messages_in_flight_lo: atomic<u32>,
messages_in_flight_hi: atomic<u32>,
input_head_lo: atomic<u32>,
input_head_hi: atomic<u32>,
input_tail_lo: atomic<u32>,
input_tail_hi: atomic<u32>,
output_head_lo: atomic<u32>,
output_head_hi: atomic<u32>,
output_tail_lo: atomic<u32>,
output_tail_hi: atomic<u32>,
input_capacity: u32,
output_capacity: u32,
input_mask: u32,
output_mask: u32,
hlc_physical_lo: atomic<u32>,
hlc_physical_hi: atomic<u32>,
hlc_logical_lo: atomic<u32>,
hlc_logical_hi: atomic<u32>,
last_error: atomic<u32>,
error_count: atomic<u32>,
}
// Message header structure (simplified for WGSL)
struct MessageHeader {
magic_lo: u32,
magic_hi: u32,
version: u32,
flags: u32,
message_id_lo: u32,
message_id_hi: u32,
correlation_id_lo: u32,
correlation_id_hi: u32,
source_kernel_lo: u32,
source_kernel_hi: u32,
dest_kernel_lo: u32,
dest_kernel_hi: u32,
message_type_lo: u32,
message_type_hi: u32,
priority: u32,
payload_size_lo: u32,
payload_size_hi: u32,
checksum: u32,
}
// Bindings
@group(0) @binding(0) var<storage, read_write> control: ControlBlock;
@group(0) @binding(1) var<storage, read_write> input_queue: array<u32>;
@group(0) @binding(2) var<storage, read_write> output_queue: array<u32>;
@group(0) @binding(3) var<storage, read_write> user_state: array<u32>;
// Thread context
struct ThreadContext {
thread_id: u32,
workgroup_id: u32,
global_id: u32,
num_workgroups: u32,
}
// Build thread context
var<private> ctx: ThreadContext;
@compute @workgroup_size(256)
fn {{KERNEL_ID}}_main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(workgroup_id) workgroup_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(num_workgroups) num_workgroups: vec3<u32>
) {
// Initialize thread context
ctx.thread_id = local_id.x;
ctx.workgroup_id = workgroup_id.x;
ctx.global_id = global_id.x;
ctx.num_workgroups = num_workgroups.x;
// Check if kernel should process
let is_active = atomicLoad(&control.is_active);
if (is_active == 0u) {
return;
}
// Check termination
let should_term = atomicLoad(&control.should_terminate);
if (should_term != 0u) {
if (ctx.thread_id == 0u && ctx.workgroup_id == 0u) {
atomicStore(&control.has_terminated, 1u);
}
return;
}
// Synchronize workgroup
workgroupBarrier();
// === User kernel code ===
{{USER_CODE}}
// === End user code ===
// Synchronize after processing
storageBarrier();
// Update statistics (simplified - increment lower 32 bits only)
if (ctx.thread_id == 0u && ctx.workgroup_id == 0u) {
let old_lo = atomicAdd(&control.messages_processed_lo, 1u);
// Handle overflow to high bits
if (old_lo == 0xFFFFFFFFu) {
atomicAdd(&control.messages_processed_hi, 1u);
}
}
}
// Helper functions
fn read_u64(lo: ptr<storage, atomic<u32>, read_write>, hi: ptr<storage, atomic<u32>, read_write>) -> vec2<u32> {
let lo_val = atomicLoad(lo);
let hi_val = atomicLoad(hi);
return vec2<u32>(lo_val, hi_val);
}
fn write_u64(lo: ptr<storage, atomic<u32>, read_write>, hi: ptr<storage, atomic<u32>, read_write>, value: vec2<u32>) {
atomicStore(lo, value.x);
atomicStore(hi, value.y);
}
fn add_u64(lo: ptr<storage, atomic<u32>, read_write>, hi: ptr<storage, atomic<u32>, read_write>, addend: u32) {
let old_lo = atomicAdd(lo, addend);
// Check for overflow
if (old_lo > 0xFFFFFFFFu - addend) {
atomicAdd(hi, 1u);
}
}
fn compare_u64(a: vec2<u32>, b: vec2<u32>) -> i32 {
if (a.y > b.y) { return 1; }
if (a.y < b.y) { return -1; }
if (a.x > b.x) { return 1; }
if (a.x < b.x) { return -1; }
return 0;
}