nightshade 0.24.0

A cross-platform data-oriented game engine.
Documentation
struct DrawIndexedIndirect {
    index_count: u32,
    instance_count: u32,
    first_index: u32,
    base_vertex: i32,
    first_instance: u32,
};

struct CompactParams {
    class_count: u32,
    _pad0: u32,
    _pad1: u32,
    _pad2: u32,
};

@group(0) @binding(0)
var<storage, read_write> commands: array<DrawIndexedIndirect>;

@group(0) @binding(1)
var<storage, read_write> counts: array<u32>;

@group(0) @binding(2)
var<storage, read> class_ranges: array<vec2<u32>>;

@group(0) @binding(3)
var<uniform> params: CompactParams;

const WORKGROUP_SIZE: u32 = 256u;

var<workgroup> prefix: array<u32, 256>;
var<workgroup> running_base: u32;

@compute @workgroup_size(256)
fn main(
    @builtin(workgroup_id) workgroup_id: vec3<u32>,
    @builtin(local_invocation_id) local_id: vec3<u32>,
) {
    let class_index = workgroup_id.x;
    if class_index >= params.class_count {
        return;
    }

    let range = class_ranges[class_index];
    let start = range.x;
    let len = range.y;
    let thread = local_id.x;

    if thread == 0u {
        running_base = 0u;
    }
    workgroupBarrier();

    let chunk_count = (len + WORKGROUP_SIZE - 1u) / WORKGROUP_SIZE;
    for (var chunk = 0u; chunk < chunk_count; chunk = chunk + 1u) {
        let index = chunk * WORKGROUP_SIZE + thread;

        var command: DrawIndexedIndirect;
        var keep = 0u;
        if index < len {
            command = commands[start + index];
            if command.instance_count > 0u {
                keep = 1u;
            }
        }
        prefix[thread] = keep;
        workgroupBarrier();

        for (var offset = 1u; offset < WORKGROUP_SIZE; offset = offset << 1u) {
            var addend = 0u;
            if thread >= offset {
                addend = prefix[thread - offset];
            }
            workgroupBarrier();
            prefix[thread] = prefix[thread] + addend;
            workgroupBarrier();
        }

        let inclusive = prefix[thread];
        let chunk_total = prefix[WORKGROUP_SIZE - 1u];
        if keep == 1u {
            let dense_index = running_base + inclusive - 1u;
            commands[start + dense_index] = command;
        }
        workgroupBarrier();
        if thread == 0u {
            running_base = running_base + chunk_total;
        }
        workgroupBarrier();
    }

    if thread == 0u {
        counts[class_index] = running_base;
    }
}