rend3-routine 0.3.0

Customizable Render Routines for the rend3 rendering library.
Documentation
struct Range {
    start: u32;
    end: u32;
};

/// See documentation for the same struct in skinning/mod.rs
struct GpuSkinningInput {
    mesh_range: Range;
    skeleton_range: Range;
    joints_start_idx: u32;
};

struct JointMatrices {
    matrices: array<mat4x4<f32>>;
};

/// The arrays are tightly packed and `vec3` does not have the right stride
struct Vec3 { x: f32; y: f32; z: f32; };
struct Vec3Array { data: array<Vec3>; };

struct JointWeightVec { ws: array<f32,4>; };
struct JointWeightVecArray { data: array<JointWeightVec>; };

/// The u16 type does not exist in shaders, so we need to unpack the u16 indices
/// from a pair of u32s
struct JointIndexVec { indices_0_1: u32; indices_2_3: u32; };
struct JointIndexVecArray { data: array<JointIndexVec>; };

[[group(0), binding(0)]]
var<storage, read_write> positions: Vec3Array;

[[group(0), binding(1)]]
var<storage, read_write> normals: Vec3Array;

[[group(0), binding(2)]]
var<storage, read_write> tangents: Vec3Array;

[[group(0), binding(3)]]
var<storage, read_write> joint_indices: JointIndexVecArray;

[[group(0), binding(4)]]
var<storage, read_write> joint_weights: JointWeightVecArray;

[[group(0), binding(5)]]
var<storage> joint_matrices: JointMatrices;

[[group(1), binding(0)]]
var<storage> input : GpuSkinningInput;

fn to_v(v: Vec3) -> vec3<f32> {
    return vec3<f32>(v.x, v.y, v.z);
}
fn from_v(v: vec3<f32>) -> Vec3 {
    var res : Vec3;
    res.x = v.x; res.y = v.y; res.z = v.z;
    return res;
}


fn get_joint_matrix(joint_idx: u32) -> mat4x4<f32> {
    return joint_matrices.matrices[input.joints_start_idx + joint_idx];
}

fn get_inv_scale_squared(matrix: mat3x3<f32>) -> vec3<f32> {
    return vec3<f32>(
        1.0 / dot(matrix[0].xyz, matrix[0].xyz), 
        1.0 / dot(matrix[1].xyz, matrix[1].xyz),
        1.0 / dot(matrix[2].xyz, matrix[2].xyz)
    );
}

[[stage(compute), workgroup_size(64)]]
fn main([[builtin(global_invocation_id)]] global_id: vec3<u32>) {
    let idx = global_id.x;

    let count = input.mesh_range.end - input.mesh_range.start;
    if (idx >= count) {
        return;
    }

    let joint_is = joint_indices.data[input.mesh_range.start + idx];


    // NOTE: This should use bitfieldExtract once it is available on dx12
    let joint_i0 = joint_is.indices_0_1 & 0x0000ffffu;
    let joint_i1 = (joint_is.indices_0_1 & 0xffff0000u) >> 16u;
    let joint_i2 = joint_is.indices_2_3 & 0x0000ffffu;
    let joint_i3 = (joint_is.indices_2_3 & 0xffff0000u) >> 16u;
    var joint_indices = array<u32,4>(joint_i0, joint_i1, joint_i2, joint_i3);

    // Compute the skinned position
    var pos_acc = vec3<f32>(0.0, 0.0, 0.0);
    var norm_acc = vec3<f32>(0.0, 0.0, 0.0);
    var tang_acc = vec3<f32>(0.0, 0.0, 0.0);

    let pos = to_v(positions.data[input.mesh_range.start + idx]);
    let normal = to_v(normals.data[input.mesh_range.start + idx]);
    let tangent = to_v(tangents.data[input.mesh_range.start + idx]);
    
    for (var i = 0; i < 4; i = i + 1) {
        let weight = joint_weights.data[input.mesh_range.start + idx].ws[i];

        if (weight > 0.0) {
            let joint_index = joint_indices[i];
            let joint_matrix = get_joint_matrix(joint_index);
            let joint_matrix3 = mat3x3<f32>(joint_matrix[0].xyz, joint_matrix[1].xyz, joint_matrix[2].xyz);
            pos_acc = pos_acc + ((joint_matrix * vec4<f32>(pos, 1.0)) * weight).xyz;
            let inv_scale_sq = get_inv_scale_squared(joint_matrix3);
            norm_acc = norm_acc + (joint_matrix3 * (inv_scale_sq * normal)) * weight;
            tang_acc = tang_acc + (joint_matrix3 * (inv_scale_sq * tangent)) * weight;
        }
    }
    
    // Write to output region of buffer
    positions.data[input.skeleton_range.start + idx] = from_v(pos_acc);
    normals.data[input.skeleton_range.start + idx] = from_v(normalize(norm_acc));
    tangents.data[input.skeleton_range.start + idx] = from_v(normalize(tang_acc));
}