//
// RingKernel Metal Shading Language Template
// Kernel ID: {{KERNEL_ID}}
// Generated by ringkernel-codegen
//
#include <metal_stdlib>
#include <metal_atomic>
using namespace metal;
// Control block structure (128 bytes, aligned)
struct ControlBlock {
atomic_uint is_active;
atomic_uint should_terminate;
atomic_uint has_terminated;
uint _pad1;
atomic_ulong messages_processed;
atomic_ulong messages_in_flight;
atomic_ulong input_head;
atomic_ulong input_tail;
atomic_ulong output_head;
atomic_ulong output_tail;
uint input_capacity;
uint output_capacity;
uint input_mask;
uint output_mask;
atomic_ulong hlc_physical;
atomic_ulong hlc_logical;
atomic_uint last_error;
atomic_uint error_count;
uchar _reserved[16];
};
// Message header (256 bytes)
struct MessageHeader {
ulong magic;
uint version;
uint flags;
ulong message_id;
ulong correlation_id;
ulong source_kernel;
ulong dest_kernel;
ulong message_type;
uchar priority;
uchar _reserved1[7];
ulong payload_size;
uint checksum;
uint _reserved2;
ulong ts_physical;
ulong ts_logical;
ulong ts_node_id;
ulong deadline_physical;
ulong deadline_logical;
ulong deadline_node_id;
uchar _reserved3[104];
};
// Thread context
struct ThreadContext {
uint thread_id;
uint threadgroup_id;
uint threads_per_group;
uint threadgroups_count;
};
// User state type (customize as needed)
struct UserState {
uchar data[4096];
};
// Main kernel entry
kernel void {{KERNEL_ID}}_main(
device ControlBlock* control [[buffer(0)]],
device uchar* input_queue [[buffer(1)]],
device uchar* output_queue [[buffer(2)]],
device UserState* user_state [[buffer(3)]],
uint thread_id [[thread_position_in_threadgroup]],
uint threadgroup_id [[threadgroup_position_in_grid]],
uint threads_per_group [[threads_per_threadgroup]],
uint threadgroups_count [[threadgroups_per_grid]]
) {
// Build thread context
ThreadContext ctx;
ctx.thread_id = thread_id;
ctx.threadgroup_id = threadgroup_id;
ctx.threads_per_group = threads_per_group;
ctx.threadgroups_count = threadgroups_count;
// Check if kernel should process
uint is_active = atomic_load_explicit(&control->is_active, memory_order_acquire);
if (is_active == 0) {
return;
}
// Check termination
uint should_term = atomic_load_explicit(&control->should_terminate, memory_order_acquire);
if (should_term != 0) {
if (thread_id == 0 && threadgroup_id == 0) {
atomic_store_explicit(&control->has_terminated, 1u, memory_order_release);
}
return;
}
// Synchronize threadgroup
threadgroup_barrier(mem_flags::mem_threadgroup);
// === User kernel code ===
{{USER_CODE}}
// === End user code ===
// Synchronize after processing
threadgroup_barrier(mem_flags::mem_device);
// Update statistics
if (thread_id == 0 && threadgroup_id == 0) {
atomic_fetch_add_explicit(&control->messages_processed, 1UL, memory_order_relaxed);
}
}
// Helper functions
inline void memory_fence_threadgroup() {
threadgroup_barrier(mem_flags::mem_threadgroup);
}
inline void memory_fence_device() {
threadgroup_barrier(mem_flags::mem_device);
}
inline ulong atomic_load_u64(device atomic_ulong* ptr) {
return atomic_load_explicit(ptr, memory_order_acquire);
}
inline void atomic_store_u64(device atomic_ulong* ptr, ulong value) {
atomic_store_explicit(ptr, value, memory_order_release);
}
inline ulong atomic_add_u64(device atomic_ulong* ptr, ulong value) {
return atomic_fetch_add_explicit(ptr, value, memory_order_relaxed);
}
inline bool atomic_cas_u64(device atomic_ulong* ptr, thread ulong* expected, ulong desired) {
return atomic_compare_exchange_weak_explicit(
ptr, expected, desired,
memory_order_acq_rel, memory_order_acquire
);
}