nightshade 0.43.0

A cross-platform data-oriented game engine.
Documentation
#import nightshade::cull_common::{DrawIndexedIndirect, sphere_in_frustum}

struct ModelMatrix {
    model: mat4x4<f32>,
    normal_matrix: mat3x3<f32>,
};

struct ObjectData {
    transform_index: u32,
    mesh_id: u32,
    material_id: u32,
    batch_id: u32,
    morph_weights: array<f32, 8>,
    morph_target_count: u32,
    morph_displacement_offset: u32,
    mesh_vertex_offset: u32,
    mesh_vertex_count: u32,
    entity_id: u32,
    is_overlay: u32,
    skip_occlusion: u32,
    flip_winding: u32,
    culling_mask: u32,
    visible: u32,
    _pad_culling_1: u32,
    _pad_culling_2: u32,
};

struct MeshBounds {
    center: vec3<f32>,
    radius: f32,
};

struct CullingUniforms {
    frustum_planes: array<vec4<f32>, 6>,
    view_projection: mat4x4<f32>,
    occluder_view_projection: mat4x4<f32>,
    screen_size: vec2<f32>,
    object_count: u32,
    min_screen_pixel_size: f32,
    projection_scale_y: f32,
    camera_culling_mask: u32,
    hiz_mip_count: f32,
    occlusion_enabled: u32,
    frustum_enabled: u32,
};

@group(0) @binding(0)
var<storage, read_write> transforms: array<ModelMatrix>;

@group(0) @binding(1)
var<storage, read> objects: array<ObjectData>;

@group(0) @binding(2)
var<uniform> culling: CullingUniforms;

@group(0) @binding(3)
var<storage, read> mesh_bounds: array<MeshBounds>;

@group(0) @binding(4)
var<storage, read_write> indirect_commands: array<DrawIndexedIndirect>;

@group(0) @binding(5)
var<storage, read_write> visible_indices: array<u32>;

struct MeshLodInfo {
    lod_count: u32,
    thresholds: array<f32, 3>,
};

@group(0) @binding(6)
var<storage, read> mesh_lod: array<MeshLodInfo>;

@group(0) @binding(7)
var hiz_texture: texture_2d<f32>;

struct ProjectedAabb {
    screen_min: vec2<f32>,
    screen_max: vec2<f32>,
    nearest_z: f32,
    valid: bool,
};

fn project_occluder_corners(corners: array<vec3<f32>, 8>) -> ProjectedAabb {
    var result: ProjectedAabb;
    result.screen_min = vec2<f32>(1.0);
    result.screen_max = vec2<f32>(0.0);
    result.nearest_z = 0.0;
    result.valid = true;
    for (var i = 0u; i < 8u; i = i + 1u) {
        let clip = culling.occluder_view_projection * vec4<f32>(corners[i], 1.0);
        if clip.w <= 0.0 {
            result.valid = false;
            return result;
        }
        let ndc = clip.xyz / clip.w;
        let uv = vec2<f32>(ndc.x * 0.5 + 0.5, 1.0 - (ndc.y * 0.5 + 0.5));
        result.screen_min = min(result.screen_min, uv);
        result.screen_max = max(result.screen_max, uv);
        result.nearest_z = max(result.nearest_z, ndc.z);
    }
    return result;
}

fn is_occluded(corners: array<vec3<f32>, 8>) -> bool {
    let projected = project_occluder_corners(corners);
    if !projected.valid {
        return false;
    }
    let pixel_pad = vec2<f32>(8.0) / culling.screen_size;
    let padded_min = clamp(projected.screen_min - pixel_pad, vec2<f32>(0.0), vec2<f32>(1.0));
    let padded_max = clamp(projected.screen_max + pixel_pad, vec2<f32>(0.0), vec2<f32>(1.0));
    if padded_min.x >= padded_max.x || padded_min.y >= padded_max.y {
        return false;
    }
    let rect_size = max(
        (padded_max.x - padded_min.x) * culling.screen_size.x,
        (padded_max.y - padded_min.y) * culling.screen_size.y,
    );
    let mip = i32(clamp(ceil(log2(max(rect_size, 1.0))), 0.0, culling.hiz_mip_count - 1.0));
    let mip_size = vec2<f32>(textureDimensions(hiz_texture, mip));
    let min_texel = clamp(vec2<i32>(floor(padded_min * mip_size)), vec2<i32>(0), vec2<i32>(mip_size) - vec2<i32>(1));
    let max_texel = clamp(vec2<i32>(floor(padded_max * mip_size)), vec2<i32>(0), vec2<i32>(mip_size) - vec2<i32>(1));
    var hiz_depth = 1.0;
    for (var y = min_texel.y; y <= max_texel.y; y = y + 1) {
        for (var x = min_texel.x; x <= max_texel.x; x = x + 1) {
            let d = textureLoad(hiz_texture, vec2<i32>(x, y), mip).r;
            hiz_depth = min(hiz_depth, d);
        }
    }
    let occlusion_bias = max(hiz_depth * 0.02, 0.0025);
    return projected.nearest_z < hiz_depth - occlusion_bias;
}

fn compute_normal_matrix(model: mat4x4<f32>) -> mat3x3<f32> {
    let a = model[0].xyz;
    let b = model[1].xyz;
    let c = model[2].xyz;
    let cofactor_0 = cross(b, c);
    let cofactor_1 = cross(c, a);
    let cofactor_2 = cross(a, b);
    let determinant = dot(a, cofactor_0);
    if abs(determinant) < 1e-8 {
        return mat3x3<f32>(
            vec3<f32>(1.0, 0.0, 0.0),
            vec3<f32>(0.0, 1.0, 0.0),
            vec3<f32>(0.0, 0.0, 1.0),
        );
    }
    let inverse_determinant = 1.0 / determinant;
    return mat3x3<f32>(
        cofactor_0 * inverse_determinant,
        cofactor_1 * inverse_determinant,
        cofactor_2 * inverse_determinant,
    );
}

@compute @workgroup_size(256)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
    let object_index = global_id.x;

    if object_index >= culling.object_count {
        return;
    }

    let object = objects[object_index];

    if object.batch_id == 0xFFFFFFFFu {
        return;
    }

    if (object.culling_mask & culling.camera_culling_mask) == 0u {
        return;
    }

    if object.visible == 0u {
        return;
    }

    let bounds = mesh_bounds[object.mesh_id];
    let transform = transforms[object.transform_index];
    let local_center = vec4<f32>(bounds.center, 1.0);
    let world_center = transform.model * local_center;
    let world_pos = world_center.xyz;
    let scale_x = length(vec3<f32>(transform.model[0][0], transform.model[0][1], transform.model[0][2]));
    let scale_y = length(vec3<f32>(transform.model[1][0], transform.model[1][1], transform.model[1][2]));
    let scale_z = length(vec3<f32>(transform.model[2][0], transform.model[2][1], transform.model[2][2]));
    let max_scale = max(max(scale_x, scale_y), scale_z);
    let bounding_radius = max_scale * bounds.radius;

    var visible = culling.frustum_enabled == 0u
        || sphere_in_frustum(culling.frustum_planes, world_pos, bounding_radius);

    var screen_diameter = 0.0;
    if visible {
        let clip = culling.view_projection * vec4(world_pos, 1.0);
        if clip.w > 0.0 {
            screen_diameter = bounding_radius * culling.projection_scale_y * culling.screen_size.y / clip.w;
            if culling.frustum_enabled != 0u && culling.min_screen_pixel_size > 0.0 && screen_diameter < culling.min_screen_pixel_size {
                visible = false;
            }
        }
    }

    if visible && culling.occlusion_enabled != 0u && object.is_overlay == 0u && object.skip_occlusion == 0u {
        let lo = bounds.center - vec3<f32>(bounds.radius);
        let hi = bounds.center + vec3<f32>(bounds.radius);
        let model = transform.model;
        let edge0 = model[0].xyz * (hi.x - lo.x);
        let edge1 = model[1].xyz * (hi.y - lo.y);
        let edge2 = model[2].xyz * (hi.z - lo.z);
        let base = (model * vec4<f32>(lo, 1.0)).xyz;
        let corners = array<vec3<f32>, 8>(
            base,
            base + edge0,
            base + edge1,
            base + edge0 + edge1,
            base + edge2,
            base + edge0 + edge2,
            base + edge1 + edge2,
            base + edge0 + edge1 + edge2,
        );
        if is_occluded(corners) {
            visible = false;
        }
    }

    if visible {
        transforms[object.transform_index].normal_matrix = compute_normal_matrix(transform.model);

        let lod_info = mesh_lod[object.mesh_id];
        var lod_level = 0u;

        if lod_info.lod_count > 1u {
            lod_level = lod_info.lod_count - 1u;
            for (var level = 0u; level < lod_info.lod_count - 1u; level++) {
                if screen_diameter >= lod_info.thresholds[level] {
                    lod_level = level;
                    break;
                }
            }
        }

        let target_batch = object.batch_id + lod_level;
        let write_index = atomicAdd(&indirect_commands[target_batch].instance_count, 1u);
        let first_instance = indirect_commands[target_batch].first_instance;
        visible_indices[first_instance + write_index] = object_index;
    }
}