vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
const FIFO_CAPACITY: u32 = 256u;
const FIFO_OP_ENQUEUE: u32 = 1u;
const FIFO_OP_DEQUEUE: u32 = 2u;
const FIFO_OP_LEN: u32 = 3u;
const FIFO_OP_IS_EMPTY: u32 = 4u;
const FIFO_OK: u32 = 0u;
const FIFO_OVERFLOW: u32 = 1u;
const FIFO_UNDERFLOW: u32 = 2u;

struct FifoCommand {
    op: u32,
    lane: u32,
    value: u32,
    reserved: u32,
}

struct FifoResult {
    status: u32,
    value: u32,
    len: u32,
    is_empty: u32,
}

@group(0) @binding(0) var<storage, read> fifo_commands: array<FifoCommand>;
@group(0) @binding(1) var<storage, read_write> fifo_results: array<FifoResult>;

var<workgroup> fifo_values: array<u32, 256>;
var<workgroup> fifo_head: atomic<u32>;
var<workgroup> fifo_tail: atomic<u32>;
var<workgroup> fifo_len: atomic<u32>;

@compute @workgroup_size(64, 1, 1)
fn workgroup_queue_fifo_kernel(@builtin(local_invocation_id) local_id: vec3<u32>) {
    let lane = local_id.x;
    if (lane == 0u) {
        atomicStore(&fifo_head, 0u);
        atomicStore(&fifo_tail, 0u);
        atomicStore(&fifo_len, 0u);
    }
    workgroupBarrier();

    var command_index = 0u;
    let command_count = arrayLength(&fifo_commands);
    loop {
        if (command_index >= command_count) {
            break;
        }

        let command = fifo_commands[command_index];
        if (command.lane == lane) {
            var status = FIFO_OK;
            var value = 0xffffffffu;
            var len_after = atomicLoad(&fifo_len);
            var empty_after = select(0u, 1u, len_after == 0u);

            if (command.op == FIFO_OP_ENQUEUE) {
                let reserved_len = atomicAdd(&fifo_len, 1u);
                if (reserved_len >= FIFO_CAPACITY) {
                    _ = atomicSub(&fifo_len, 1u);
                    status = FIFO_OVERFLOW;
                    len_after = FIFO_CAPACITY;
                    empty_after = 0u;
                } else {
                    let tail = atomicAdd(&fifo_tail, 1u);
                    fifo_values[tail % FIFO_CAPACITY] = command.value;
                    len_after = reserved_len + 1u;
                    empty_after = 0u;
                }
            } else if (command.op == FIFO_OP_DEQUEUE) {
                let old_len = atomicLoad(&fifo_len);
                if (old_len == 0u) {
                    status = FIFO_UNDERFLOW;
                    empty_after = 1u;
                } else {
                    let head = atomicAdd(&fifo_head, 1u);
                    value = fifo_values[head % FIFO_CAPACITY];
                    len_after = atomicSub(&fifo_len, 1u) - 1u;
                    empty_after = select(0u, 1u, len_after == 0u);
                }
            } else if (command.op == FIFO_OP_LEN) {
                len_after = atomicLoad(&fifo_len);
                empty_after = select(0u, 1u, len_after == 0u);
            } else if (command.op == FIFO_OP_IS_EMPTY) {
                len_after = atomicLoad(&fifo_len);
                empty_after = select(0u, 1u, len_after == 0u);
                value = empty_after;
            }

            fifo_results[command_index] = FifoResult(status, value, len_after, empty_after);
        }

        workgroupBarrier();
        command_index = command_index + 1u;
    }
}