vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
const STACK_CAPACITY: u32 = 256u;
const STACK_OP_PUSH: u32 = 1u;
const STACK_OP_POP: u32 = 2u;
const STACK_OP_PEEK: u32 = 3u;
const STACK_OP_LEN: u32 = 4u;
const STACK_OP_IS_EMPTY: u32 = 5u;
const STACK_OK: u32 = 0u;
const STACK_OVERFLOW: u32 = 1u;
const STACK_UNDERFLOW: u32 = 2u;

struct StackCommand {
    op: u32,
    lane: u32,
    value: u32,
    reserved: u32,
}

struct StackResult {
    status: u32,
    value: u32,
    len: u32,
    is_empty: u32,
}

@group(0) @binding(0) var<storage, read> stack_commands: array<StackCommand>;
@group(0) @binding(1) var<storage, read_write> stack_results: array<StackResult>;

var<workgroup> stack_values: array<u32, 256>;
var<workgroup> stack_len: atomic<u32>;

@compute @workgroup_size(64, 1, 1)
fn workgroup_stack_kernel(@builtin(local_invocation_id) local_id: vec3<u32>) {
    let lane = local_id.x;
    if (lane == 0u) {
        atomicStore(&stack_len, 0u);
    }
    workgroupBarrier();

    var command_index = 0u;
    let command_count = arrayLength(&stack_commands);
    loop {
        if (command_index >= command_count) {
            break;
        }

        let command = stack_commands[command_index];
        if (command.lane == lane) {
            var status = STACK_OK;
            var value = 0xffffffffu;
            var len_after = atomicLoad(&stack_len);
            var empty_after = select(0u, 1u, len_after == 0u);

            if (command.op == STACK_OP_PUSH) {
                let slot = atomicAdd(&stack_len, 1u);
                if (slot >= STACK_CAPACITY) {
                    _ = atomicSub(&stack_len, 1u);
                    status = STACK_OVERFLOW;
                    len_after = STACK_CAPACITY;
                    empty_after = 0u;
                } else {
                    stack_values[slot] = command.value;
                    len_after = slot + 1u;
                    empty_after = 0u;
                }
            } else if (command.op == STACK_OP_POP) {
                let old_len = atomicLoad(&stack_len);
                if (old_len == 0u) {
                    status = STACK_UNDERFLOW;
                    empty_after = 1u;
                } else {
                    let slot = atomicSub(&stack_len, 1u) - 1u;
                    value = stack_values[slot];
                    len_after = slot;
                    empty_after = select(0u, 1u, slot == 0u);
                }
            } else if (command.op == STACK_OP_PEEK) {
                let current_len = atomicLoad(&stack_len);
                if (current_len == 0u) {
                    status = STACK_UNDERFLOW;
                    empty_after = 1u;
                } else {
                    value = stack_values[current_len - 1u];
                    len_after = current_len;
                    empty_after = 0u;
                }
            } else if (command.op == STACK_OP_LEN) {
                len_after = atomicLoad(&stack_len);
                empty_after = select(0u, 1u, len_after == 0u);
            } else if (command.op == STACK_OP_IS_EMPTY) {
                len_after = atomicLoad(&stack_len);
                empty_after = select(0u, 1u, len_after == 0u);
                value = empty_after;
            }

            stack_results[command_index] = StackResult(status, value, len_after, empty_after);
        }

        workgroupBarrier();
        command_index = command_index + 1u;
    }
}