rend3_routine/
skinning.rs

1use std::{mem, num::NonZeroU64};
2
3use glam::{Mat4, UVec2};
4use rend3::{
5    graph::{DataHandle, RenderGraph},
6    managers::{
7        MeshBuffers, SkeletonManager, VERTEX_JOINT_INDEX_SIZE, VERTEX_JOINT_WEIGHT_SIZE, VERTEX_NORMAL_SIZE,
8        VERTEX_POSITION_SIZE, VERTEX_TANGENT_SIZE,
9    },
10    util::{
11        bind_merge::{BindGroupBuilder, BindGroupLayoutBuilder},
12        math::round_up_div,
13    },
14};
15use wgpu::{
16    BindGroupLayout, BindingType, Buffer, BufferBindingType, BufferDescriptor, BufferUsages, CommandEncoder,
17    ComputePassDescriptor, ComputePipeline, ComputePipelineDescriptor, Device, PipelineLayoutDescriptor,
18    ShaderModuleDescriptor, ShaderStages,
19};
20
21/// The per-skeleton data, as uploaded to the GPU compute shader.
22#[repr(C, align(16))]
23#[derive(Copy, Clone)]
24pub struct GpuSkinningInput {
25    /// See [rend3::managers::GpuVertexRanges].
26    pub mesh_range: UVec2,
27    /// See [rend3::managers::GpuVertexRanges].
28    pub skeleton_range: UVec2,
29    /// The index of this skeleton's first joint in the global joint matrix
30    /// buffer.
31    pub joint_idx: u32,
32}
33
34/// Uploads the data for the GPU skinning compute pass to the GPU
35pub fn add_pre_skin_to_graph(graph: &mut RenderGraph, pre_skin_data: DataHandle<PreSkinningBuffers>) {
36    let mut builder = graph.add_node("pre-skinning");
37    let pre_skin_handle = builder.add_data_output(pre_skin_data);
38
39    builder.build(move |_pt, renderer, _encoder_or_pass, _temps, _ready, graph_data| {
40        let buffers = build_gpu_skinning_input_buffers(&renderer.device, graph_data.skeleton_manager);
41        graph_data.set_data::<PreSkinningBuffers>(pre_skin_handle, Some(buffers));
42    });
43}
44
45/// The two buffers uploaded to the GPU during pre-skinning.
46pub struct PreSkinningBuffers {
47    gpu_skinning_inputs: Buffer,
48    joint_matrices: Buffer,
49}
50
51fn build_gpu_skinning_input_buffers(device: &Device, skeleton_manager: &SkeletonManager) -> PreSkinningBuffers {
52    profiling::scope!("Building GPU Skinning Input Data");
53
54    let skinning_inputs_size = skeleton_manager.skeletons().len() * mem::size_of::<GpuSkinningInput>();
55    let gpu_skinning_inputs = device.create_buffer(&BufferDescriptor {
56        label: Some("skinning inputs"),
57        size: skinning_inputs_size as u64,
58        usage: BufferUsages::STORAGE,
59        mapped_at_creation: true,
60    });
61
62    let joint_matrices = device.create_buffer(&BufferDescriptor {
63        label: Some("joint matrices"),
64        size: (skeleton_manager.global_joint_count() * mem::size_of::<Mat4>()) as u64,
65        usage: BufferUsages::STORAGE,
66        mapped_at_creation: true,
67    });
68
69    let mut skinning_input_data = gpu_skinning_inputs.slice(..).get_mapped_range_mut();
70    let mut joint_matrices_data = joint_matrices.slice(..).get_mapped_range_mut();
71
72    // Skeletons have a variable number of joints, so we need to keep track of
73    // the global index here.
74    let mut joint_matrix_idx = 0;
75
76    // Iterate over the skeletons, fill the buffers
77    for (idx, skeleton) in skeleton_manager.skeletons().enumerate() {
78        // SAFETY: We are always accessing elements in bounds and all accesses are
79        // aligned
80        unsafe {
81            let input = GpuSkinningInput {
82                skeleton_range: skeleton.ranges.skeleton_range,
83                mesh_range: skeleton.ranges.mesh_range,
84                joint_idx: joint_matrix_idx,
85            };
86
87            // The skinning inputs buffer has as many elements as skeletons, so
88            // using the same index as the current skeleton will never access OOB
89            let skin_input_ptr = skinning_input_data.as_mut_ptr() as *mut GpuSkinningInput;
90            skin_input_ptr.add(idx).write_unaligned(input);
91
92            let joint_matrices_ptr = joint_matrices_data.as_mut_ptr() as *mut [[f32; 4]; 4];
93            for joint_matrix in &skeleton.joint_matrices {
94                // Here, the access can't be OOB either: The joint_matrix_idx
95                // will get incremented once for every joint matrix, and the
96                // length of the buffer is exactly the sum of all joint matrix
97                // vector lengths.
98                joint_matrices_ptr
99                    .add(joint_matrix_idx as usize)
100                    .write_unaligned(joint_matrix.to_cols_array_2d());
101                joint_matrix_idx += 1;
102            }
103        }
104    }
105
106    drop(skinning_input_data);
107    drop(joint_matrices_data);
108    gpu_skinning_inputs.unmap();
109    joint_matrices.unmap();
110
111    PreSkinningBuffers {
112        gpu_skinning_inputs,
113        joint_matrices,
114    }
115}
116
117/// Holds the necessary wgpu data structures for the GPU skinning compute pass
118pub struct GpuSkinner {
119    pub pipeline: ComputePipeline,
120    pub vertex_buffers_bgl: BindGroupLayout,
121    pub skinning_inputs_bgl: BindGroupLayout,
122}
123
124impl GpuSkinner {
125    const WORKGROUP_SIZE: u32 = 64;
126
127    pub fn new(device: &wgpu::Device) -> GpuSkinner {
128        let storage_buffer_ty = |read_only, size| BindingType::Buffer {
129            ty: BufferBindingType::Storage { read_only },
130            has_dynamic_offset: false,
131            min_binding_size: size,
132        };
133
134        let pos_size = NonZeroU64::new(VERTEX_POSITION_SIZE as u64);
135        let nrm_size = NonZeroU64::new(VERTEX_NORMAL_SIZE as u64);
136        let tan_size = NonZeroU64::new(VERTEX_TANGENT_SIZE as u64);
137        let j_idx_size = NonZeroU64::new(VERTEX_JOINT_INDEX_SIZE as u64);
138        let j_wt_size = NonZeroU64::new(VERTEX_JOINT_WEIGHT_SIZE as u64);
139        let mat_size = NonZeroU64::new(mem::size_of::<Mat4>() as u64);
140
141        // Bind group 0 contains some vertex buffers bound as storage buffers
142        let vertex_buffers_bgl = BindGroupLayoutBuilder::new()
143            .append(ShaderStages::COMPUTE, storage_buffer_ty(false, pos_size), None) // Positions
144            .append(ShaderStages::COMPUTE, storage_buffer_ty(false, nrm_size), None) // Normals
145            .append(ShaderStages::COMPUTE, storage_buffer_ty(false, tan_size), None) // Tangents
146            .append(ShaderStages::COMPUTE, storage_buffer_ty(false, j_idx_size), None) // Joint indices
147            .append(ShaderStages::COMPUTE, storage_buffer_ty(false, j_wt_size), None) // Joint weights
148            .append(ShaderStages::COMPUTE, storage_buffer_ty(true, mat_size), None) // Matrices
149            .build(device, Some("Gpu skinning mesh data"));
150
151        // Bind group 1 contains the pre skinning inputs. This uses dynamic
152        // offsets because there is one dispatch per input, and the offset is
153        // used to indicate which is the current input to the shader.
154        //
155        // NOTE: This would be an ideal use case for push constants, but they are
156        // not available on all platforms so we need to use this workaround.
157        let skinning_inputs_bgl = BindGroupLayoutBuilder::new()
158            .append(
159                ShaderStages::COMPUTE,
160                BindingType::Buffer {
161                    ty: BufferBindingType::Storage { read_only: true },
162                    has_dynamic_offset: true,
163                    min_binding_size: NonZeroU64::new(mem::size_of::<GpuSkinningInput>() as u64),
164                },
165                None,
166            )
167            .build(device, Some("Gpu skinning inputs"));
168
169        let layout = device.create_pipeline_layout(&PipelineLayoutDescriptor {
170            label: None,
171            bind_group_layouts: &[&vertex_buffers_bgl, &skinning_inputs_bgl],
172            push_constant_ranges: &[],
173        });
174
175        let module = device.create_shader_module(&ShaderModuleDescriptor {
176            label: Some("Gpu skinning compute shader"),
177            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/src/skinning.wgsl").into()),
178        });
179
180        let pipeline = device.create_compute_pipeline(&ComputePipelineDescriptor {
181            label: Some("Gpu skinning pipeline"),
182            layout: Some(&layout),
183            module: &module,
184            entry_point: "main",
185        });
186
187        GpuSkinner {
188            vertex_buffers_bgl,
189            skinning_inputs_bgl,
190            pipeline,
191        }
192    }
193
194    pub fn execute_pass(
195        &self,
196        device: &Device,
197        encoder: &mut CommandEncoder,
198        buffers: &PreSkinningBuffers,
199        mesh_buffers: &MeshBuffers,
200        // The number of inputs in the skinning_inputs buffer
201        skeleton_manager: &SkeletonManager,
202    ) {
203        let vertex_buffers_bg = BindGroupBuilder::new()
204            .append_buffer(&mesh_buffers.vertex_position)
205            .append_buffer(&mesh_buffers.vertex_normal)
206            .append_buffer(&mesh_buffers.vertex_tangent)
207            .append_buffer(&mesh_buffers.vertex_joint_index)
208            .append_buffer(&mesh_buffers.vertex_joint_weight)
209            .append_buffer(&buffers.joint_matrices)
210            .build(device, Some("GPU skinning mesh data"), &self.vertex_buffers_bgl);
211
212        let skinning_inputs_bg = BindGroupBuilder::new()
213            // NOTE: Need to specify a binding size to avoid getting the full buffer's.
214            .append_buffer_with_size(&buffers.gpu_skinning_inputs, mem::size_of::<GpuSkinningInput>() as u64)
215            .build(device, Some("GPU skinning inputs"), &self.skinning_inputs_bgl);
216
217        let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor {
218            label: Some("GPU Skinning"),
219        });
220        cpass.set_bind_group(0, &vertex_buffers_bg, &[]);
221
222        for (i, skel) in skeleton_manager.skeletons().enumerate() {
223            cpass.set_pipeline(&self.pipeline);
224
225            let offset = (i * mem::size_of::<GpuSkinningInput>()) as u32;
226            cpass.set_bind_group(1, &skinning_inputs_bg, &[offset]);
227
228            let num_verts = (skel.ranges.mesh_range[1] - skel.ranges.mesh_range[0]) as u32;
229            let num_workgroups = round_up_div(num_verts, Self::WORKGROUP_SIZE);
230            cpass.dispatch(num_workgroups, 1, 1);
231        }
232    }
233}
234
235/// The GPU skinning node works by producing a side effect: Mutating the
236/// skeleton copies of the vertex buffer in-place. All this happens on GPU
237/// memory, so there is no data to be returned on the CPU side. This type
238/// represents the (virtual) output of GPU skinning.
239///
240/// This is used to ensure skinning will be called at the right time in the
241/// render graph (before any culling happens).
242pub struct SkinningOutput;
243
244/// Performs skinning on the GPU.
245pub fn add_skinning_to_graph<'node>(
246    graph: &mut RenderGraph<'node>,
247    gpu_skinner: &'node GpuSkinner,
248    pre_skin_data: DataHandle<PreSkinningBuffers>,
249    skinned_data: DataHandle<SkinningOutput>,
250) {
251    let mut builder = graph.add_node("skinning");
252    let pre_skin_handle = builder.add_data_input(pre_skin_data);
253    let skinned_data_handle = builder.add_data_output(skinned_data);
254
255    let skinner_pt = builder.passthrough_ref(gpu_skinner);
256
257    builder.build(move |pt, renderer, encoder_or_pass, temps, _ready, graph_data| {
258        let skinner = pt.get(skinner_pt);
259        let encoder = encoder_or_pass.get_encoder();
260        let skin_input = graph_data
261            .get_data(temps, pre_skin_handle)
262            .expect("Skinning requires pre-skinning to run first");
263
264        // Avoid running the compute pass if there are no skeletons. This
265        // prevents binding an empty buffer
266        if graph_data.skeleton_manager.skeletons().len() > 0 {
267            skinner.execute_pass(
268                &renderer.device,
269                encoder,
270                skin_input,
271                graph_data.mesh_manager.buffers(),
272                graph_data.skeleton_manager,
273            );
274        }
275
276        graph_data.set_data(skinned_data_handle, Some(SkinningOutput));
277    });
278}