ringkernel-codegen 0.4.2

Code generation for RingKernel - generates GPU kernel source code
Documentation
//
// RingKernel Metal Shading Language Template
// Kernel ID: {{KERNEL_ID}}
// Generated by ringkernel-codegen
//

#include <metal_stdlib>
#include <metal_atomic>
using namespace metal;

// Control block structure (128 bytes, aligned)
struct ControlBlock {
    atomic_uint is_active;
    atomic_uint should_terminate;
    atomic_uint has_terminated;
    uint _pad1;

    atomic_ulong messages_processed;
    atomic_ulong messages_in_flight;

    atomic_ulong input_head;
    atomic_ulong input_tail;
    atomic_ulong output_head;
    atomic_ulong output_tail;

    uint input_capacity;
    uint output_capacity;
    uint input_mask;
    uint output_mask;

    atomic_ulong hlc_physical;
    atomic_ulong hlc_logical;

    atomic_uint last_error;
    atomic_uint error_count;

    uchar _reserved[16];
};

// Message header (256 bytes)
struct MessageHeader {
    ulong magic;
    uint version;
    uint flags;
    ulong message_id;
    ulong correlation_id;
    ulong source_kernel;
    ulong dest_kernel;
    ulong message_type;
    uchar priority;
    uchar _reserved1[7];
    ulong payload_size;
    uint checksum;
    uint _reserved2;
    ulong ts_physical;
    ulong ts_logical;
    ulong ts_node_id;
    ulong deadline_physical;
    ulong deadline_logical;
    ulong deadline_node_id;
    uchar _reserved3[104];
};

// Thread context
struct ThreadContext {
    uint thread_id;
    uint threadgroup_id;
    uint threads_per_group;
    uint threadgroups_count;
};

// User state type (customize as needed)
struct UserState {
    uchar data[4096];
};

// Main kernel entry
kernel void {{KERNEL_ID}}_main(
    device ControlBlock* control [[buffer(0)]],
    device uchar* input_queue [[buffer(1)]],
    device uchar* output_queue [[buffer(2)]],
    device UserState* user_state [[buffer(3)]],
    uint thread_id [[thread_position_in_threadgroup]],
    uint threadgroup_id [[threadgroup_position_in_grid]],
    uint threads_per_group [[threads_per_threadgroup]],
    uint threadgroups_count [[threadgroups_per_grid]]
) {
    // Build thread context
    ThreadContext ctx;
    ctx.thread_id = thread_id;
    ctx.threadgroup_id = threadgroup_id;
    ctx.threads_per_group = threads_per_group;
    ctx.threadgroups_count = threadgroups_count;

    // Check if kernel should process
    uint is_active = atomic_load_explicit(&control->is_active, memory_order_acquire);
    if (is_active == 0) {
        return;
    }

    // Check termination
    uint should_term = atomic_load_explicit(&control->should_terminate, memory_order_acquire);
    if (should_term != 0) {
        if (thread_id == 0 && threadgroup_id == 0) {
            atomic_store_explicit(&control->has_terminated, 1u, memory_order_release);
        }
        return;
    }

    // Synchronize threadgroup
    threadgroup_barrier(mem_flags::mem_threadgroup);

    // === User kernel code ===
    {{USER_CODE}}
    // === End user code ===

    // Synchronize after processing
    threadgroup_barrier(mem_flags::mem_device);

    // Update statistics
    if (thread_id == 0 && threadgroup_id == 0) {
        atomic_fetch_add_explicit(&control->messages_processed, 1UL, memory_order_relaxed);
    }
}

// Helper functions

inline void memory_fence_threadgroup() {
    threadgroup_barrier(mem_flags::mem_threadgroup);
}

inline void memory_fence_device() {
    threadgroup_barrier(mem_flags::mem_device);
}

inline ulong atomic_load_u64(device atomic_ulong* ptr) {
    return atomic_load_explicit(ptr, memory_order_acquire);
}

inline void atomic_store_u64(device atomic_ulong* ptr, ulong value) {
    atomic_store_explicit(ptr, value, memory_order_release);
}

inline ulong atomic_add_u64(device atomic_ulong* ptr, ulong value) {
    return atomic_fetch_add_explicit(ptr, value, memory_order_relaxed);
}

inline bool atomic_cas_u64(device atomic_ulong* ptr, thread ulong* expected, ulong desired) {
    return atomic_compare_exchange_weak_explicit(
        ptr, expected, desired,
        memory_order_acq_rel, memory_order_acquire
    );
}