use bevy_app::{App, Plugin};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
entity::{Entity, EntityHashMap},
query::{Has, With},
schedule::IntoSystemConfigs as _,
system::{Query, Res, ResMut, Resource, StaticSystemParam},
world::{FromWorld, World},
};
use bevy_encase_derive::ShaderType;
use bytemuck::{Pod, Zeroable};
use nonmax::NonMaxU32;
use smallvec::smallvec;
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
use crate::{
render_phase::{
BinnedPhaseItem, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem,
PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
ViewBinnedRenderPhases, ViewSortedRenderPhases,
},
render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
renderer::{RenderAdapter, RenderDevice, RenderQueue},
view::{ExtractedView, GpuCulling, ViewTarget},
Render, RenderApp, RenderSet,
};
use super::{BatchMeta, GetBatchData, GetFullBatchData};
pub struct BatchingPlugin;
impl Plugin for BatchingPlugin {
fn build(&self, app: &mut App) {
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app
.insert_resource(IndirectParametersBuffer::new())
.add_systems(
Render,
write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush),
);
}
fn finish(&self, app: &mut App) {
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
return;
};
render_app.init_resource::<GpuPreprocessingSupport>();
}
}
#[derive(Clone, Copy, PartialEq, Resource)]
pub enum GpuPreprocessingSupport {
None,
PreprocessingOnly,
Culling,
}
#[derive(Resource)]
pub struct BatchedInstanceBuffers<BD, BDI>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
BDI: Pod,
{
pub data_buffer: UninitBufferVec<BD>,
pub work_item_buffers: EntityHashMap<PreprocessWorkItemBuffer>,
pub current_input_buffer: RawBufferVec<BDI>,
pub previous_input_buffer: RawBufferVec<BDI>,
}
pub struct PreprocessWorkItemBuffer {
pub buffer: BufferVec<PreprocessWorkItem>,
pub gpu_culling: bool,
}
#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct PreprocessWorkItem {
pub input_index: u32,
pub output_index: u32,
}
#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
#[repr(C)]
pub struct IndirectParameters {
pub vertex_or_index_count: u32,
pub instance_count: u32,
pub first_vertex_or_first_index: u32,
pub base_vertex_or_first_instance: u32,
pub first_instance: u32,
}
#[derive(Resource, Deref, DerefMut)]
pub struct IndirectParametersBuffer(pub BufferVec<IndirectParameters>);
impl IndirectParametersBuffer {
pub fn new() -> IndirectParametersBuffer {
IndirectParametersBuffer(BufferVec::new(
BufferUsages::STORAGE | BufferUsages::INDIRECT,
))
}
}
impl Default for IndirectParametersBuffer {
fn default() -> Self {
Self::new()
}
}
impl FromWorld for GpuPreprocessingSupport {
fn from_world(world: &mut World) -> Self {
let adapter = world.resource::<RenderAdapter>();
let device = world.resource::<RenderDevice>();
fn is_non_supported_android_device(adapter: &RenderAdapter) -> bool {
if cfg!(target_os = "android") {
let adapter_name = adapter.get_info().name;
let non_supported_adreno_model = |model: &str| -> bool {
let model = model
.chars()
.map_while(|c| c.to_digit(10))
.fold(0, |acc, digit| acc * 10 + digit);
model != 720 && model <= 730
};
adapter_name
.strip_prefix("Adreno (TM) ")
.is_some_and(non_supported_adreno_model)
} else {
false
}
}
if device.limits().max_compute_workgroup_size_x == 0 || is_non_supported_android_device(adapter)
{
GpuPreprocessingSupport::None
} else if !device
.features()
.contains(Features::INDIRECT_FIRST_INSTANCE) ||
!adapter.get_downlevel_capabilities().flags.contains(
DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW)
{
GpuPreprocessingSupport::PreprocessingOnly
} else {
GpuPreprocessingSupport::Culling
}
}
}
impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
BDI: Pod,
{
pub fn new() -> Self {
BatchedInstanceBuffers {
data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
work_item_buffers: EntityHashMap::default(),
current_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
previous_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
}
}
pub fn instance_data_binding(&self) -> Option<BindingResource> {
self.data_buffer
.buffer()
.map(|buffer| buffer.as_entire_binding())
}
pub fn clear(&mut self) {
self.data_buffer.clear();
self.current_input_buffer.clear();
self.previous_input_buffer.clear();
for work_item_buffer in self.work_item_buffers.values_mut() {
work_item_buffer.buffer.clear();
}
}
}
impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
where
BD: GpuArrayBufferable + Sync + Send + 'static,
BDI: Pod,
{
fn default() -> Self {
Self::new()
}
}
struct SortedRenderBatch<F>
where
F: GetBatchData,
{
phase_item_start_index: u32,
instance_start_index: u32,
indirect_parameters_index: Option<NonMaxU32>,
meta: Option<BatchMeta<F::CompareData>>,
}
impl<F> SortedRenderBatch<F>
where
F: GetBatchData,
{
fn flush<I>(self, instance_end_index: u32, phase: &mut SortedRenderPhase<I>)
where
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
{
let (batch_range, batch_extra_index) =
phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
*batch_range = self.instance_start_index..instance_end_index;
*batch_extra_index =
PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index);
}
}
pub fn clear_batched_gpu_instance_buffers<GFBD>(
gpu_batched_instance_buffers: Option<
ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
>,
) where
GFBD: GetFullBatchData,
{
if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
gpu_batched_instance_buffers.clear();
}
}
pub fn delete_old_work_item_buffers<GFBD>(
mut gpu_batched_instance_buffers: ResMut<
BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
>,
view_targets: Query<Entity, With<ViewTarget>>,
) where
GFBD: GetFullBatchData,
{
gpu_batched_instance_buffers
.work_item_buffers
.retain(|entity, _| view_targets.contains(*entity));
}
pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
mut views: Query<(Entity, Has<GpuCulling>), With<ExtractedView>>,
system_param_item: StaticSystemParam<GFBD::Param>,
) where
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
GFBD: GetFullBatchData,
{
let BatchedInstanceBuffers {
ref mut data_buffer,
ref mut work_item_buffers,
..
} = gpu_array_buffer.into_inner();
for (view, gpu_culling) in &mut views {
let Some(phase) = sorted_render_phases.get_mut(&view) else {
continue;
};
let work_item_buffer =
work_item_buffers
.entry(view)
.or_insert_with(|| PreprocessWorkItemBuffer {
buffer: BufferVec::new(BufferUsages::STORAGE),
gpu_culling,
});
let mut batch: Option<SortedRenderBatch<GFBD>> = None;
for current_index in 0..phase.items.len() {
let item = &phase.items[current_index];
let entity = (item.entity(), item.main_entity());
let current_batch_input_index =
GFBD::get_index_and_compare_data(&system_param_item, entity);
let Some((current_input_index, current_meta)) = current_batch_input_index else {
if let Some(batch) = batch.take() {
batch.flush(data_buffer.len() as u32, phase);
}
continue;
};
let current_meta =
current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
let can_batch = batch.as_ref().is_some_and(|batch| {
match (¤t_meta, &batch.meta) {
(Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
(_, _) => false,
}
});
let item = &phase.items[current_index];
let entity = (item.entity(), item.main_entity());
let output_index = data_buffer.add() as u32;
if !can_batch {
if let Some(batch) = batch.take() {
batch.flush(output_index, phase);
}
let indirect_parameters_index = if gpu_culling {
GFBD::get_batch_indirect_parameters_index(
&system_param_item,
&mut indirect_parameters_buffer,
entity,
output_index,
)
} else {
None
};
batch = Some(SortedRenderBatch {
phase_item_start_index: current_index as u32,
instance_start_index: output_index,
indirect_parameters_index,
meta: current_meta,
});
}
if let Some(batch) = batch.as_ref() {
work_item_buffer.buffer.push(PreprocessWorkItem {
input_index: current_input_index.into(),
output_index: match batch.indirect_parameters_index {
Some(indirect_parameters_index) => indirect_parameters_index.into(),
None => output_index,
},
});
}
}
if let Some(batch) = batch.take() {
batch.flush(data_buffer.len() as u32, phase);
}
}
}
pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
mut views: Query<(Entity, Has<GpuCulling>), With<ExtractedView>>,
param: StaticSystemParam<GFBD::Param>,
) where
BPI: BinnedPhaseItem,
GFBD: GetFullBatchData,
{
let system_param_item = param.into_inner();
let BatchedInstanceBuffers {
ref mut data_buffer,
ref mut work_item_buffers,
..
} = gpu_array_buffer.into_inner();
for (view, gpu_culling) in &mut views {
let Some(phase) = binned_render_phases.get_mut(&view) else {
continue;
};
let work_item_buffer =
work_item_buffers
.entry(view)
.or_insert_with(|| PreprocessWorkItemBuffer {
buffer: BufferVec::new(BufferUsages::STORAGE),
gpu_culling,
});
for key in &phase.batchable_mesh_keys {
let mut batch: Option<BinnedRenderPhaseBatch> = None;
for &(entity, main_entity) in &phase.batchable_mesh_values[key] {
let Some(input_index) =
GFBD::get_binned_index(&system_param_item, (entity, main_entity))
else {
continue;
};
let output_index = data_buffer.add() as u32;
match batch {
Some(ref mut batch) => {
batch.instance_range.end = output_index + 1;
work_item_buffer.buffer.push(PreprocessWorkItem {
input_index: input_index.into(),
output_index: batch
.extra_index
.as_indirect_parameters_index()
.unwrap_or(output_index),
});
}
None if gpu_culling => {
let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
&system_param_item,
&mut indirect_parameters_buffer,
(entity, main_entity),
output_index,
);
work_item_buffer.buffer.push(PreprocessWorkItem {
input_index: input_index.into(),
output_index: indirect_parameters_index.unwrap_or_default().into(),
});
batch = Some(BinnedRenderPhaseBatch {
representative_entity: (entity, main_entity),
instance_range: output_index..output_index + 1,
extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
indirect_parameters_index,
),
});
}
None => {
work_item_buffer.buffer.push(PreprocessWorkItem {
input_index: input_index.into(),
output_index,
});
batch = Some(BinnedRenderPhaseBatch {
representative_entity: (entity, main_entity),
instance_range: output_index..output_index + 1,
extra_index: PhaseItemExtraIndex::NONE,
});
}
}
}
if let Some(batch) = batch {
phase.batch_sets.push(smallvec![batch]);
}
}
for key in &phase.unbatchable_mesh_keys {
let unbatchables = phase.unbatchable_mesh_values.get_mut(key).unwrap();
for &(entity, main_entity) in &unbatchables.entities {
let Some(input_index) =
GFBD::get_binned_index(&system_param_item, (entity, main_entity))
else {
continue;
};
let output_index = data_buffer.add() as u32;
if gpu_culling {
let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
&system_param_item,
&mut indirect_parameters_buffer,
(entity, main_entity),
output_index,
)
.unwrap_or_default();
work_item_buffer.buffer.push(PreprocessWorkItem {
input_index: input_index.into(),
output_index: indirect_parameters_index.into(),
});
unbatchables
.buffer_indices
.add(UnbatchableBinnedEntityIndices {
instance_index: indirect_parameters_index.into(),
extra_index: PhaseItemExtraIndex::indirect_parameters_index(
indirect_parameters_index.into(),
),
});
} else {
work_item_buffer.buffer.push(PreprocessWorkItem {
input_index: input_index.into(),
output_index,
});
unbatchables
.buffer_indices
.add(UnbatchableBinnedEntityIndices {
instance_index: output_index,
extra_index: PhaseItemExtraIndex::NONE,
});
}
}
}
}
}
pub fn write_batched_instance_buffers<GFBD>(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
) where
GFBD: GetFullBatchData,
{
let BatchedInstanceBuffers {
ref mut data_buffer,
work_item_buffers: ref mut index_buffers,
ref mut current_input_buffer,
previous_input_buffer: _,
} = gpu_array_buffer.into_inner();
data_buffer.write_buffer(&render_device);
current_input_buffer.write_buffer(&render_device, &render_queue);
for index_buffer in index_buffers.values_mut() {
index_buffer
.buffer
.write_buffer(&render_device, &render_queue);
}
}
pub fn write_indirect_parameters_buffer(
render_device: Res<RenderDevice>,
render_queue: Res<RenderQueue>,
mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
) {
indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
indirect_parameters_buffer.clear();
}