viewport-lib 0.14.0

// GPU-driven culling compute shader.
//
// Dispatch 1 - cull_instances (workgroup_size 64):
//   One thread per instance. Tests the world-space AABB against the 6 frustum
//   planes. On pass: atomically claims a visibility slot and writes the instance
//   index into the visibility buffer.
//
// Dispatch 2 - write_indirect_args (workgroup_size 64):
//   One thread per batch. Reads the final visible count from the batch counter,
//   writes one DrawIndexedIndirect entry, then zeroes the counter for the next
//   frame.
//
//   The two dispatches must run in separate compute passes. wgpu inserts a
//   storage-buffer barrier between compute passes automatically, which
//   guarantees dispatch 2 sees all writes from dispatch 1.
//
// All buffers share a single bind group (group 0).

struct FrustumPlane {
    normal:   vec3<f32>,
    distance: f32,
}

struct FrustumUniform {
    planes:         array<FrustumPlane, 6>,
    instance_count: u32,
    batch_count:    u32,
    _pad:           vec2<u32>,
}

struct InstanceAabb {
    min:         vec3<f32>,
    batch_index: u32,
    max:         vec3<f32>,
    _pad:        u32,
}

struct BatchMeta {
    index_count:     u32,
    first_index:     u32,
    instance_offset: u32,
    instance_count:  u32,
    vis_offset:      u32,
    is_transparent:  u32,
    _pad0:           u32,
    _pad1:           u32,
}

// Matches the wgpu DrawIndexedIndirect layout:
//   index_count, instance_count, first_index, base_vertex (i32), first_instance.
struct DrawIndirect {
    index_count:    u32,
    instance_count: u32,
    first_index:    u32,
    base_vertex:    i32,
    first_instance: u32,
}

@group(0) @binding(0) var<uniform>             frustum:            FrustumUniform;
@group(0) @binding(1) var<storage, read>       instance_aabbs:     array<InstanceAabb>;
@group(0) @binding(2) var<storage, read>       batch_metas:        array<BatchMeta>;
@group(0) @binding(3) var<storage, read_write> batch_counters:     array<atomic<u32>>;
@group(0) @binding(4) var<storage, read_write> visibility_indices: array<u32>;
@group(0) @binding(5) var<storage, read_write> indirect_args:      array<DrawIndirect>;

// Returns true if the AABB is entirely on the outer (negative) side of the plane.
// Uses the positive-vertex method: take the corner most aligned with the plane
// normal; if that corner is still outside, the whole AABB is outside.
fn aabb_outside_plane(aabb_min: vec3<f32>, aabb_max: vec3<f32>, plane: FrustumPlane) -> bool {
    let n = plane.normal;
    let px = select(aabb_min.x, aabb_max.x, n.x >= 0.0);
    let py = select(aabb_min.y, aabb_max.y, n.y >= 0.0);
    let pz = select(aabb_min.z, aabb_max.z, n.z >= 0.0);
    return dot(n, vec3<f32>(px, py, pz)) + plane.distance < 0.0;
}

@compute @workgroup_size(64)
fn cull_instances(@builtin(global_invocation_id) id: vec3<u32>) {
    let i = id.x;
    if i >= frustum.instance_count {
        return;
    }

    let aabb = instance_aabbs[i];

    // Reject if outside any of the 6 frustum planes.
    for (var p = 0u; p < 6u; p++) {
        if aabb_outside_plane(aabb.min, aabb.max, frustum.planes[p]) {
            return;
        }
    }

    // Visible: atomically claim a slot in the visibility buffer for this batch.
    let b    = aabb.batch_index;
    let slot = atomicAdd(&batch_counters[b], 1u);
    let bmeta = batch_metas[b];
    // Guard against counter overflow if instance_count drifts from buffer size.
    if slot < bmeta.instance_count {
        visibility_indices[bmeta.vis_offset + slot] = i;
    }
}

@compute @workgroup_size(64)
fn write_indirect_args(@builtin(global_invocation_id) id: vec3<u32>) {
    let b = id.x;
    if b >= frustum.batch_count {
        return;
    }

    let bmeta         = batch_metas[b];
    let visible_count = atomicLoad(&batch_counters[b]);

    // Write one DrawIndexedIndirect struct.
    // first_instance = vis_offset so the vertex shader indexes into
    // visibility_indices starting at the right offset for this batch.
    // (Requires INDIRECT_FIRST_INSTANCE device feature.)
    indirect_args[b] = DrawIndirect(
        bmeta.index_count,
        visible_count,
        bmeta.first_index,
        0i,
        bmeta.vis_offset,
    );

    // Zero the counter ready for the next frame's cull_instances dispatch.
    atomicStore(&batch_counters[b], 0u);
}