ringkernel-codegen 1.1.0

Code generation for RingKernel - generates GPU kernel source code
Documentation
//
// RingKernel WebGPU Shading Language Template
// Kernel ID: {{KERNEL_ID}}
// Generated by ringkernel-codegen
//

// Control block structure
// Note: WGSL lacks 64-bit atomics, so we split u64 values into lo/hi u32 pairs
struct ControlBlock {
    is_active: atomic<u32>,
    should_terminate: atomic<u32>,
    has_terminated: atomic<u32>,
    _pad1: u32,

    // 64-bit values split into u32 pairs
    messages_processed_lo: atomic<u32>,
    messages_processed_hi: atomic<u32>,
    messages_in_flight_lo: atomic<u32>,
    messages_in_flight_hi: atomic<u32>,

    input_head_lo: atomic<u32>,
    input_head_hi: atomic<u32>,
    input_tail_lo: atomic<u32>,
    input_tail_hi: atomic<u32>,
    output_head_lo: atomic<u32>,
    output_head_hi: atomic<u32>,
    output_tail_lo: atomic<u32>,
    output_tail_hi: atomic<u32>,

    input_capacity: u32,
    output_capacity: u32,
    input_mask: u32,
    output_mask: u32,

    hlc_physical_lo: atomic<u32>,
    hlc_physical_hi: atomic<u32>,
    hlc_logical_lo: atomic<u32>,
    hlc_logical_hi: atomic<u32>,

    last_error: atomic<u32>,
    error_count: atomic<u32>,
}

// Message header structure (simplified for WGSL)
struct MessageHeader {
    magic_lo: u32,
    magic_hi: u32,
    version: u32,
    flags: u32,
    message_id_lo: u32,
    message_id_hi: u32,
    correlation_id_lo: u32,
    correlation_id_hi: u32,
    source_kernel_lo: u32,
    source_kernel_hi: u32,
    dest_kernel_lo: u32,
    dest_kernel_hi: u32,
    message_type_lo: u32,
    message_type_hi: u32,
    priority: u32,
    payload_size_lo: u32,
    payload_size_hi: u32,
    checksum: u32,
}

// Bindings
@group(0) @binding(0) var<storage, read_write> control: ControlBlock;
@group(0) @binding(1) var<storage, read_write> input_queue: array<u32>;
@group(0) @binding(2) var<storage, read_write> output_queue: array<u32>;
@group(0) @binding(3) var<storage, read_write> user_state: array<u32>;

// Thread context
struct ThreadContext {
    thread_id: u32,
    workgroup_id: u32,
    global_id: u32,
    num_workgroups: u32,
}

// Build thread context
var<private> ctx: ThreadContext;

@compute @workgroup_size(256)
fn {{KERNEL_ID}}_main(
    @builtin(global_invocation_id) global_id: vec3<u32>,
    @builtin(workgroup_id) workgroup_id: vec3<u32>,
    @builtin(local_invocation_id) local_id: vec3<u32>,
    @builtin(num_workgroups) num_workgroups: vec3<u32>
) {
    // Initialize thread context
    ctx.thread_id = local_id.x;
    ctx.workgroup_id = workgroup_id.x;
    ctx.global_id = global_id.x;
    ctx.num_workgroups = num_workgroups.x;

    // Check if kernel should process
    let is_active = atomicLoad(&control.is_active);
    if (is_active == 0u) {
        return;
    }

    // Check termination
    let should_term = atomicLoad(&control.should_terminate);
    if (should_term != 0u) {
        if (ctx.thread_id == 0u && ctx.workgroup_id == 0u) {
            atomicStore(&control.has_terminated, 1u);
        }
        return;
    }

    // Synchronize workgroup
    workgroupBarrier();

    // === User kernel code ===
    {{USER_CODE}}
    // === End user code ===

    // Synchronize after processing
    storageBarrier();

    // Update statistics (simplified - increment lower 32 bits only)
    if (ctx.thread_id == 0u && ctx.workgroup_id == 0u) {
        let old_lo = atomicAdd(&control.messages_processed_lo, 1u);
        // Handle overflow to high bits
        if (old_lo == 0xFFFFFFFFu) {
            atomicAdd(&control.messages_processed_hi, 1u);
        }
    }
}

// Helper functions

fn read_u64(lo: ptr<storage, atomic<u32>, read_write>, hi: ptr<storage, atomic<u32>, read_write>) -> vec2<u32> {
    let lo_val = atomicLoad(lo);
    let hi_val = atomicLoad(hi);
    return vec2<u32>(lo_val, hi_val);
}

fn write_u64(lo: ptr<storage, atomic<u32>, read_write>, hi: ptr<storage, atomic<u32>, read_write>, value: vec2<u32>) {
    atomicStore(lo, value.x);
    atomicStore(hi, value.y);
}

fn add_u64(lo: ptr<storage, atomic<u32>, read_write>, hi: ptr<storage, atomic<u32>, read_write>, addend: u32) {
    let old_lo = atomicAdd(lo, addend);
    // Check for overflow
    if (old_lo > 0xFFFFFFFFu - addend) {
        atomicAdd(hi, 1u);
    }
}

fn compare_u64(a: vec2<u32>, b: vec2<u32>) -> i32 {
    if (a.y > b.y) { return 1; }
    if (a.y < b.y) { return -1; }
    if (a.x > b.x) { return 1; }
    if (a.x < b.x) { return -1; }
    return 0;
}