rend3-routine 0.3.0

Customizable Render Routines for the rend3 rendering library.
Documentation
#version 440

#ifdef ATOMIC_CULL
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#endif

#include "structures.glsl"

layout(local_size_x = 256) in;

struct CullingUniforms {
    mat4 view;
    mat4 view_proj;
    Frustum frustum;
    uint object_count;
};

#ifdef ATOMIC_CULL
#define OBJECT_OUTPUT_DATA_BUFFER_BINDING 2
#define INDIRECT_BUFFER_BINDING 3
#else // ATOMIC_CULL
#define OBJECT_OUTPUT_DATA_BUFFER_BINDING 4
#define INDIRECT_BUFFER_BINDING 5
#endif // ATOMIC_CULL

layout(set = 0, binding = 0, std430) restrict readonly buffer ObjectInputDataBuffer {
    ObjectInputData object_input[];
};
layout(set = 0, binding = 1) uniform ObjectInputUniforms {
    CullingUniforms uniforms;
};
#ifndef ATOMIC_CULL
layout(set = 0, binding = 2, std430) restrict buffer IntermediateBufferA {
    uint result_index_a[];
};
layout(set = 0, binding = 3, std430) restrict buffer IntermediateBufferB {
    uint result_index_b[];
};
#endif
layout(set = 0, binding = OBJECT_OUTPUT_DATA_BUFFER_BINDING, std430) restrict buffer ObjectOutputDataBuffer {
    ObjectOutputData object_output[];
};
layout(set = 0, binding = INDIRECT_BUFFER_BINDING, std430) restrict buffer IndirectBuffer {
    uint draw_call_count;
    uint pad0;
    uint pad1;
    uint pad2;
    IndirectCall indirect_call[];
};

float plane_distance(Plane plane, vec3 location) {
    return dot(plane.inner.xyz, location) + plane.inner.w;
}

bool frustum_contains_sphere(Frustum frustum, vec4 sphere) {
    vec3 location = sphere.xyz;
    float neg_radius = -sphere.w;

    if (!(plane_distance(frustum.left, location) >= neg_radius)) {
        return false;
    }
    if (!(plane_distance(frustum.right, location) >= neg_radius)) {
        return false;
    }
    if (!(plane_distance(frustum.top, location) >= neg_radius)) {
        return false;
    }
    if (!(plane_distance(frustum.bottom, location) >= neg_radius)) {
        return false;
    }
    if (!(plane_distance(frustum.near, location) >= neg_radius)) {
        return false;
    }

    return true;
}

vec4 transform_sphere(vec4 sphere, mat4 transform) {
    float max_scale = max(max(length(transform[0].xyz), length(transform[1].xyz)), length(transform[2].xyz));
    vec4 center = transform * vec4(sphere.xyz, 1.0);

    return vec4(center.xyz, sphere.w * max_scale);
}

float self_dot(vec3 me) {
    return dot(me, me);
}

vec3 inv_squared_scale(mat3 matrix) {
    return 1.0 / vec3(self_dot(matrix[0].xyz), self_dot(matrix[1].xyz), self_dot(matrix[2].xyz));
}

void write_output(ObjectInputData in_data, uint index) {
    ObjectOutputData out_data;

    out_data.model_view = uniforms.view * in_data.transform;
    out_data.model_view_proj = uniforms.view_proj * in_data.transform;
    out_data.inv_squared_scale = inv_squared_scale(mat3(out_data.model_view));
    out_data.material_idx = in_data.material_idx;

    object_output[index] = out_data;

    IndirectCall call;
    call.vertex_count = in_data.count;
    call.instance_count = 1;
    call.base_index = in_data.start_idx;
    call.vertex_offset = in_data.vertex_offset;
    call.base_instance = index;
    indirect_call[index] = call;
}

#ifdef ATOMIC_CULL
void main() {
    uint input_idx = gl_GlobalInvocationID.x;

    if (input_idx >= uniforms.object_count) {
        return;
    }

    ObjectInputData in_data = object_input[input_idx];

    mat4 model_view = uniforms.view * in_data.transform;
    vec4 mesh_sphere = transform_sphere(in_data.bounding_sphere, model_view);

    bool visible = frustum_contains_sphere(uniforms.frustum, mesh_sphere);

    if (!visible) {
        return;
    }

    uint output_idx = atomicAdd(draw_call_count, 1);

    write_output(in_data, output_idx);
}
#endif // ATOMIC_CULL

#ifdef PREFIX_CULL
void main() {
    uint input_idx = gl_GlobalInvocationID.x;

    if (input_idx >= uniforms.object_count) {
        return;
    }

    ObjectInputData in_data = object_input[input_idx];

    mat4 model_view = uniforms.view * in_data.transform;
    vec4 mesh_sphere = transform_sphere(in_data.bounding_sphere, model_view);

    bool visible = frustum_contains_sphere(uniforms.frustum, mesh_sphere);

    uint result_index_1 = bitfieldInsert(0, uint(visible), 31, 1);
    uint result_index_2 = bitfieldInsert(result_index_1, uint(visible), 0, 31);

    result_index_a[input_idx] = result_index_2;
}
#endif // PREFIX_CULL

#ifdef PREFIX_SUM
layout(push_constant) uniform PushConstants {
    uint stride;
};

void main() {
    uint my_idx = gl_GlobalInvocationID.x;

    if (my_idx >= uniforms.object_count) {
        return;
    }

    if (my_idx < stride) {
        result_index_b[my_idx] = result_index_a[my_idx];
        return;
    }

    uint other_idx = my_idx - stride;

    uint me = result_index_a[my_idx];
    uint other = result_index_a[other_idx];
    uint result = me;
    result = bitfieldInsert(result, bitfieldExtract(me, 0, 31) + bitfieldExtract(other, 0, 31), 0, 31);
    result_index_b[my_idx] = result;
}
#endif // PREFIX_SUM

#ifdef PREFIX_OUTPUT
void main() {
    uint input_idx = gl_GlobalInvocationID.x;

    if (input_idx >= uniforms.object_count) {
        return;
    }

    uint result = result_index_a[input_idx];
    uint objects_before = bitfieldExtract(result, 0, 31);
    uint output_idx = objects_before - 1;
    bool enabled = bool(bitfieldExtract(result, 31, 1));

    if (input_idx == uniforms.object_count - 1) {
        draw_call_count = objects_before;
    }

    if (!enabled) {
        return;
    }

    write_output(object_input[input_idx], output_idx);
}
#endif