use crate::ecs::mesh::components::{
Vertex, create_cone_mesh, create_cube_mesh, create_cylinder_mesh, create_plane_mesh,
create_sphere_mesh, create_subdivided_plane_mesh, create_torus_mesh,
};
use crate::ecs::prefab::resources::mesh_cache_iter;
use super::super::types::{
BUFFER_GROWTH_FACTOR, BUFFER_SHRINK_THRESHOLD, BatchDesc, BatchKey,
COMPACTION_MIN_RECLAIM_BYTES, DrawIndexedIndirect, MAX_INSTANCES, ModelMatrix,
NUM_DRAW_CLASSES,
};
use super::super::world_state::BatchRange;
use super::MeshPass;
impl MeshPass {
pub(in super::super) fn build_lists_from_combos(
&mut self,
device: &wgpu::Device,
queue: &wgpu::Queue,
) {
let mask_ids = self.state().cached_mask_material_ids.clone();
let mut sorted: Vec<(u32, u32, u32, u32)> = self
.state()
.combos
.iter()
.map(|(&(class, mesh, material), &count)| (class, mesh, material, count))
.collect();
sorted.sort_by_key(|&(class, mesh, material, _)| {
(
class,
u32::from(mask_ids.contains(&material)),
mesh,
material,
)
});
let mut lists: [Vec<BatchRange>; NUM_DRAW_CLASSES] = Default::default();
let mut descs: Vec<BatchDesc> = Vec::new();
let mut keys: Vec<BatchKey> = Vec::new();
let mut prepass_batch_counts = [0u32; NUM_DRAW_CLASSES];
for (class, mesh, material, count) in sorted {
let base_slot = descs.len() as u32;
let added = match self
.mesh_lod_mesh_ids
.get(mesh as usize)
.filter(|ids| !ids.is_empty())
{
Some(lod_ids) => {
for &lod_mesh in lod_ids {
descs.push(BatchDesc {
mesh_geo_id: lod_mesh,
capacity: count,
});
lists[class as usize].push((lod_mesh, material, 0, 0));
}
lod_ids.len() as u32
}
None => {
descs.push(BatchDesc {
mesh_geo_id: mesh,
capacity: count,
});
lists[class as usize].push((mesh, material, 0, 0));
1
}
};
if !mask_ids.contains(&material) {
prepass_batch_counts[class as usize] += added;
}
keys.push(BatchKey {
pipeline_class: class,
mesh_id: mesh,
material_id: material,
base_slot,
});
}
keys.sort_by_key(|key| (key.pipeline_class, key.mesh_id, key.material_id));
let total_batch_count = descs.len();
{
let state = self.state_mut();
state.opaque_instances = std::mem::take(&mut lists[0]);
state.opaque_double_sided_instances = std::mem::take(&mut lists[1]);
state.transparent_instances = std::mem::take(&mut lists[2]);
state.overlay_opaque_instances = std::mem::take(&mut lists[3]);
state.overlay_opaque_double_sided_instances = std::mem::take(&mut lists[4]);
state.overlay_transparent_instances = std::mem::take(&mut lists[5]);
state.instanced_opaque_batches = std::mem::take(&mut lists[6]);
state.instanced_opaque_double_sided_batches = std::mem::take(&mut lists[7]);
state.instanced_transparent_batches = std::mem::take(&mut lists[8]);
state.indirect_reset_count = total_batch_count;
state.prepass_batch_counts = prepass_batch_counts;
}
if total_batch_count > self.gpu().indirect_buffer_size {
let new_size = std::cmp::min(
(total_batch_count as f32 * BUFFER_GROWTH_FACTOR).ceil() as usize,
MAX_INSTANCES,
);
let gpu_mut = self.gpu_mut();
gpu_mut.indirect_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Indirect Draw Buffer (Resized)"),
size: (std::mem::size_of::<DrawIndexedIndirect>() * new_size) as u64,
usage: wgpu::BufferUsages::INDIRECT
| wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
gpu_mut.indirect_buffer_size = new_size;
gpu_mut.indirect_reset_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Indirect Reset Buffer (Resized)"),
size: (std::mem::size_of::<DrawIndexedIndirect>() * new_size) as u64,
usage: wgpu::BufferUsages::COPY_SRC
| wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::STORAGE,
mapped_at_creation: false,
});
gpu_mut.batch_descs_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Batch Descriptor Buffer (Resized)"),
size: (std::mem::size_of::<BatchDesc>() * new_size) as u64,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
gpu_mut.culling_bind_group = None;
gpu_mut.cluster_assign_bind_group = None;
gpu_mut.compaction_bind_group = None;
gpu_mut.indirect_build_bind_group = None;
gpu_mut.batch_assign_bind_group = None;
}
if keys.len() > self.gpu().batch_key_buffer_size {
let new_size = (keys.len() as f32 * BUFFER_GROWTH_FACTOR).ceil() as usize;
self.gpu_mut().batch_key_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Batch Key Buffer (Resized)"),
size: (std::mem::size_of::<BatchKey>() * new_size) as u64,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
self.gpu_mut().batch_key_buffer_size = new_size;
self.gpu_mut().batch_assign_bind_group = None;
}
let total_visible_slots: usize = descs.iter().map(|d| d.capacity as usize).sum();
let visible_indices_size =
(self.gpu().visible_indices_buffer.size() / std::mem::size_of::<u32>() as u64) as usize;
if total_visible_slots > visible_indices_size {
let new_size = (total_visible_slots as f32 * BUFFER_GROWTH_FACTOR).ceil() as usize;
self.gpu_mut().visible_indices_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Visible Indices Buffer (Resized)"),
size: (std::mem::size_of::<u32>() * new_size) as u64,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
self.rebuild_instance_bind_group(device);
self.gpu_mut().culling_bind_group = None;
}
if !descs.is_empty() {
queue.write_buffer(
&self.gpu().batch_descs_buffer,
0,
bytemuck::cast_slice(&descs),
);
}
if !keys.is_empty() {
queue.write_buffer(&self.gpu().batch_key_buffer, 0, bytemuck::cast_slice(&keys));
}
self.gpu_mut().batch_key_count = keys.len() as u32;
}
pub(in super::super) fn can_do_rebatch_only(&self) -> bool {
let Some(fd) = self.frame_dirty.as_ref() else {
return false;
};
if !fd.frame_initialized
|| fd.full_rebuild_needed
|| fd.instanced_meshes_changed
|| !fd.entities_added.is_empty()
|| !fd.entities_removed.is_empty()
|| !fd.material_dirty.is_empty()
{
return false;
}
if !fd.batches_invalidated {
return false;
}
let Some(world_state) = self.world_state_get(self.current_world_id) else {
return false;
};
!world_state.cached_entities.is_empty() && world_state.gpu_buffers.is_some()
}
pub(in super::super) fn rebatch_cached_entities(
&mut self,
world: &crate::ecs::world::World,
device: &wgpu::Device,
queue: &wgpu::Queue,
) {
let dirty_transforms = self
.frame_dirty
.as_mut()
.map(|fd| std::mem::take(&mut fd.transform_dirty))
.unwrap_or_default();
let world_state = self.world_states[self.current_world_id as usize]
.as_mut()
.unwrap();
let gpu = world_state.gpu_buffers.as_ref().unwrap();
for entity in &dirty_transforms {
if let Some(&slot) = world_state.gpu_registry.entity_to_slot.get(entity)
&& let Some(transform) = world.core.get_global_transform(*entity)
{
let model_matrix = ModelMatrix {
model: transform.0.into(),
normal_matrix: [[0.0; 4]; 3],
};
if (slot as usize) < world_state.cached_transforms.len() {
world_state.cached_transforms[slot as usize] = model_matrix;
}
let offset = (slot as u64) * std::mem::size_of::<ModelMatrix>() as u64;
queue.write_buffer(
&gpu.transform_buffer,
offset,
bytemuck::cast_slice(&[model_matrix]),
);
}
}
{
let gpu = world_state.gpu_buffers.as_ref().unwrap();
queue.write_buffer(
&gpu.object_buffer,
0,
bytemuck::cast_slice(&world_state.cached_objects),
);
}
self.build_lists_from_combos(device, queue);
}
pub(in super::super) fn draw_batches<'a>(
pass: &mut wgpu::RenderPass<'a>,
batch_count: usize,
batch_offset: usize,
class_index: usize,
supports_count: bool,
indirect_buffer: &'a wgpu::Buffer,
count_buffer: &'a wgpu::Buffer,
) {
if batch_count == 0 {
return;
}
let indirect_offset = (batch_offset * std::mem::size_of::<DrawIndexedIndirect>()) as u64;
if supports_count {
pass.multi_draw_indexed_indirect_count(
indirect_buffer,
indirect_offset,
count_buffer,
(class_index * std::mem::size_of::<u32>()) as u64,
batch_count as u32,
);
} else {
pass.multi_draw_indexed_indirect(indirect_buffer, indirect_offset, batch_count as u32);
}
}
pub(in super::super) fn compute_vertex_utilization(&self) -> f32 {
let actual_vertex_count: u64 = self.mesh_data.iter().map(|m| m.vertex_count as u64).sum();
let actual_vertex_bytes = actual_vertex_count * std::mem::size_of::<Vertex>() as u64;
if self.vertex_buffer_size > 0 {
actual_vertex_bytes as f32 / self.vertex_buffer_size as f32
} else {
1.0
}
}
pub(in super::super) fn compute_index_utilization(&self) -> f32 {
let actual_index_count: u64 = self.mesh_data.iter().map(|m| m.index_count as u64).sum();
let actual_index_bytes = actual_index_count * std::mem::size_of::<u32>() as u64;
if self.index_buffer_size > 0 {
actual_index_bytes as f32 / self.index_buffer_size as f32
} else {
1.0
}
}
pub(in super::super) fn check_and_compact_buffers(
&mut self,
device: &wgpu::Device,
queue: &wgpu::Queue,
mesh_cache: &crate::ecs::prefab::resources::MeshCache,
) -> bool {
let actual_vertex_count: u64 = self.mesh_data.iter().map(|m| m.vertex_count as u64).sum();
let actual_index_count: u64 = self.mesh_data.iter().map(|m| m.index_count as u64).sum();
let actual_vertex_bytes = actual_vertex_count * std::mem::size_of::<Vertex>() as u64;
let actual_index_bytes = actual_index_count * std::mem::size_of::<u32>() as u64;
let vertex_utilization = if self.vertex_buffer_size > 0 {
actual_vertex_bytes as f32 / self.vertex_buffer_size as f32
} else {
1.0
};
let index_utilization = if self.index_buffer_size > 0 {
actual_index_bytes as f32 / self.index_buffer_size as f32
} else {
1.0
};
let utilization_below = vertex_utilization < BUFFER_SHRINK_THRESHOLD
|| index_utilization < BUFFER_SHRINK_THRESHOLD;
let reclaimable_bytes = self.vertex_buffer_size.saturating_sub(actual_vertex_bytes)
+ self.index_buffer_size.saturating_sub(actual_index_bytes);
let should_compact = utilization_below && reclaimable_bytes >= COMPACTION_MIN_RECLAIM_BYTES;
let did_compact = should_compact && actual_vertex_bytes > 0 && actual_index_bytes > 0;
if did_compact {
self.meshes.clear();
self.mesh_data.clear();
self.mesh_names.clear();
self.mesh_bounds_data.clear();
self.mesh_lod_data.clear();
for entry in &mut self.mesh_lod_mesh_ids {
entry.clear();
}
self.current_vertex_offset = 0;
self.current_index_offset = 0;
self.current_morph_displacement_offset = 0;
self.add_mesh(device, queue, "Cube", create_cube_mesh());
self.add_mesh(device, queue, "Sphere", create_sphere_mesh(1.0, 16));
self.add_mesh(device, queue, "Sphere_LOD1", create_sphere_mesh(1.0, 8));
self.add_mesh(device, queue, "Sphere_LOD2", create_sphere_mesh(1.0, 4));
self.add_mesh(device, queue, "Plane", create_plane_mesh(2.0));
self.add_mesh(
device,
queue,
"SubdividedPlane",
create_subdivided_plane_mesh(2.0, 20),
);
self.add_mesh(
device,
queue,
"Cylinder",
create_cylinder_mesh(0.5, 1.0, 16),
);
self.add_mesh(device, queue, "Cone", create_cone_mesh(0.5, 1.0, 16));
self.add_mesh(device, queue, "Torus", create_torus_mesh(1.0, 0.3, 16, 16));
for (name, mesh) in mesh_cache_iter(mesh_cache) {
if !self.meshes.contains_key(name) {
self.add_mesh(device, queue, name, mesh.clone());
}
}
let final_vertex_bytes =
self.current_vertex_offset as u64 * std::mem::size_of::<Vertex>() as u64;
let final_index_bytes =
self.current_index_offset as u64 * std::mem::size_of::<u32>() as u64;
let new_vertex_size = (final_vertex_bytes as f32 * BUFFER_GROWTH_FACTOR).ceil() as u64;
let new_index_size = (final_index_bytes as f32 * BUFFER_GROWTH_FACTOR).ceil() as u64;
if new_vertex_size < self.vertex_buffer_size && final_vertex_bytes > 0 {
let new_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Mesh Vertex Buffer (Compacted)"),
size: new_vertex_size,
usage: wgpu::BufferUsages::VERTEX
| wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Mesh Vertex Buffer Compaction"),
});
encoder.copy_buffer_to_buffer(
&self.vertex_buffer,
0,
&new_buffer,
0,
final_vertex_bytes,
);
queue.submit(std::iter::once(encoder.finish()));
self.vertex_buffer = new_buffer;
self.vertex_buffer_size = new_vertex_size;
}
if new_index_size < self.index_buffer_size && final_index_bytes > 0 {
let new_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Mesh Index Buffer (Compacted)"),
size: new_index_size,
usage: wgpu::BufferUsages::INDEX
| wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Mesh Index Buffer Compaction"),
});
encoder.copy_buffer_to_buffer(
&self.index_buffer,
0,
&new_buffer,
0,
final_index_bytes,
);
queue.submit(std::iter::once(encoder.finish()));
self.index_buffer = new_buffer;
self.index_buffer_size = new_index_size;
}
}
did_compact
}
}