use crate::grid::prefix_sum::{PrefixSumWorkspace, WgPrefixSum};
use crate::grid::sort::WgSort;
use crate::solver::{GpuParticleModelData, GpuParticles, GpuRigidParticles, ParticlePosition};
use bytemuck::{Pod, Zeroable};
use encase::ShaderType;
use crate::math::Point;
use slang_hal::backend::Backend;
use slang_hal::function::GpuFunction;
use slang_hal::{BufferUsages, Shader, ShaderArgs};
use std::sync::Arc;
use stensor::tensor::{GpuScalar, GpuVector};
#[derive(Shader)]
#[shader(module = "slosh::grid::grid")]
pub struct WgGrid<B: Backend> {
reset_hmap: GpuFunction<B>,
reset: GpuFunction<B>,
init_indirect_workgroups: GpuFunction<B>,
}
#[derive(ShaderArgs)]
struct GridArgs<'a, B: Backend> {
grid: &'a GpuScalar<GpuGridMetadata, B>,
hmap_entries: &'a GpuVector<GpuGridHashMapEntry, B>,
n_block_groups: &'a GpuVector<[u32; 3], B>,
n_g2p_p2g_groups: &'a GpuVector<[u32; 3], B>,
nodes: &'a GpuVector<GpuGridNode, B>,
nodes_linked_lists: &'a GpuVector<[u32; 2], B>,
rigid_nodes_linked_lists: &'a GpuVector<[u32; 2], B>,
scan_values: &'a GpuVector<u32, B>,
particles_pos: &'a GpuVector<ParticlePosition, B>,
particles_len: &'a GpuScalar<u32, B>,
active_blocks: &'a GpuVector<GpuActiveBlockHeader, B>,
rigid_particles_pos: &'a GpuVector<Point<f32>, B>,
rigid_particle_needs_block: &'a GpuVector<u32, B>,
sorted_particle_ids: &'a GpuVector<u32, B>,
particle_node_linked_lists: &'a GpuVector<u32, B>,
}
impl<B: Backend> WgGrid<B> {
pub fn launch_sort<'a, GpuModel: GpuParticleModelData>(
&'a self,
backend: &B,
pass: &mut B::Pass,
particles: &GpuParticles<B, GpuModel>,
rigid_particles: &GpuRigidParticles<B>,
grid: &GpuGrid<B>,
prefix_sum: &mut PrefixSumWorkspace<B>,
sort_module: &'a WgSort<B>,
prefix_sum_module: &'a WgPrefixSum<B>,
) -> Result<(), B::Error> {
let args = GridArgs {
grid: &grid.meta,
hmap_entries: &grid.hmap_entries,
n_block_groups: &grid.indirect_n_blocks_groups,
n_g2p_p2g_groups: &grid.indirect_n_g2p_p2g_groups,
nodes: &grid.nodes,
nodes_linked_lists: &grid.nodes_linked_lists,
rigid_nodes_linked_lists: &grid.rigid_nodes_linked_lists,
scan_values: &grid.scan_values,
particles_pos: particles.positions(),
particles_len: particles.gpu_len(),
active_blocks: &grid.active_blocks,
rigid_particles_pos: &rigid_particles.sample_points,
rigid_particle_needs_block: &rigid_particles.rigid_particle_needs_block,
sorted_particle_ids: particles.sorted_ids(),
particle_node_linked_lists: particles.node_linked_lists(),
};
let mut sparse_grid_has_the_correct_size = false;
while !sparse_grid_has_the_correct_size {
self.reset_hmap
.launch(backend, pass, &args, [grid.cpu_meta.hmap_capacity, 1, 1])?;
sort_module.touch_particle_blocks.launch(
backend,
pass,
&args,
[particles.len() as u32, 1, 1],
)?;
sparse_grid_has_the_correct_size = true;
}
self.init_indirect_workgroups
.launch_grid(backend, pass, &args, [1, 1, 1])?;
sort_module.update_block_particle_count.launch(
backend,
pass,
&args,
[particles.len() as u32, 1, 1],
)?;
sort_module
.copy_particles_len_to_scan_value
.launch_indirect(backend, pass, &args, grid.indirect_n_blocks_groups.buffer())?;
prefix_sum_module.launch(backend, pass, prefix_sum, &grid.scan_values)?;
sort_module
.copy_scan_values_to_first_particles
.launch_indirect(backend, pass, &args, grid.indirect_n_blocks_groups.buffer())?;
self.reset.launch_indirect(
backend,
pass,
&args,
grid.indirect_n_g2p_p2g_groups.buffer(),
)?;
sort_module.finalize_particles_sort.launch(
backend,
pass,
&args,
[particles.len() as u32, 1, 1],
)?;
Ok(())
}
}
#[derive(Copy, Clone, PartialEq, Pod, Zeroable)]
#[repr(C)]
pub struct GpuGridMetadata {
num_active_blocks: u32,
cell_width: f32,
hmap_capacity: u32,
capacity: u32,
}
#[derive(Copy, Clone, PartialEq, ShaderType)]
#[repr(C)]
pub struct GpuGridNode {
momentum_velocity_mass: nalgebra::Vector4<f32>,
momentum_velocity_mass_incompatible: nalgebra::Vector4<f32>,
cdf: GpuGridNodeCdf,
}
#[derive(Copy, Clone, PartialEq, Pod, Zeroable)]
#[repr(C)]
pub struct BlockVirtualId {
#[cfg(feature = "dim2")]
id: nalgebra::Vector2<i32>,
#[cfg(feature = "dim3")]
id: nalgebra::Vector4<i32>, }
#[derive(Copy, Clone, PartialEq, Pod, Zeroable)]
#[repr(C)]
pub struct GpuGridHashMapEntry {
state: u32,
#[cfg(feature = "dim2")]
pad0: u32,
#[cfg(feature = "dim3")]
pad0: nalgebra::Vector3<u32>,
key: BlockVirtualId,
value: u32,
#[cfg(feature = "dim2")]
pad1: u32,
#[cfg(feature = "dim3")]
pad1: nalgebra::Vector3<u32>,
}
impl Default for GpuGridHashMapEntry {
fn default() -> Self {
Self {
state: u32::MAX,
pad0: Default::default(),
key: BlockVirtualId::zeroed(),
value: 0,
pad1: Default::default(),
}
}
}
#[derive(Copy, Clone, PartialEq, Pod, Zeroable)]
#[repr(C)]
pub struct GpuActiveBlockHeader {
virtual_id: BlockVirtualId,
first_particle: u32,
num_particles: u32,
}
#[derive(Copy, Clone, PartialEq, Default, Debug, ShaderType)]
#[repr(C)]
pub struct GpuGridNodeCdf {
pub distance: f32,
pub affinities: u32,
pub closest_id: u32,
}
pub struct GpuGrid<B: Backend> {
pub cpu_meta: GpuGridMetadata,
pub meta: GpuScalar<GpuGridMetadata, B>,
pub prev_meta: GpuScalar<GpuGridMetadata, B>,
pub hmap_entries: GpuVector<GpuGridHashMapEntry, B>,
pub prev_hmap_entries: GpuVector<GpuGridHashMapEntry, B>,
pub nodes: GpuVector<GpuGridNode, B>,
pub active_blocks: GpuVector<GpuActiveBlockHeader, B>,
pub scan_values: GpuVector<u32, B>,
pub nodes_linked_lists: GpuVector<[u32; 2], B>,
pub rigid_nodes_linked_lists: GpuVector<[u32; 2], B>,
pub indirect_n_blocks_groups: Arc<GpuScalar<[u32; 3], B>>,
pub indirect_n_g2p_p2g_groups: Arc<GpuScalar<[u32; 3], B>>,
pub debug: GpuVector<u32, B>,
}
impl<B: Backend> GpuGrid<B> {
pub fn with_capacity(backend: &B, capacity: u32, cell_width: f32) -> Result<Self, B::Error> {
const NODES_PER_BLOCK: u32 = 64; let capacity = capacity.next_power_of_two();
let cpu_meta = GpuGridMetadata {
num_active_blocks: 0,
cell_width,
hmap_capacity: capacity,
capacity,
};
let meta = GpuScalar::scalar(
backend,
cpu_meta,
BufferUsages::STORAGE | BufferUsages::COPY_SRC,
)?;
let prev_meta = GpuScalar::scalar(
backend,
cpu_meta,
BufferUsages::STORAGE | BufferUsages::COPY_SRC,
)?;
let default_entries = vec![GpuGridHashMapEntry::default(); capacity as usize];
let prev_hmap_entries =
GpuVector::vector(backend, &default_entries, BufferUsages::STORAGE)?;
let hmap_entries = GpuVector::vector(backend, &default_entries, BufferUsages::STORAGE)?;
let nodes = GpuVector::vector_uninit_encased(
backend,
capacity * NODES_PER_BLOCK,
BufferUsages::STORAGE,
)?;
let nodes_linked_lists =
GpuVector::vector_uninit(backend, capacity * NODES_PER_BLOCK, BufferUsages::STORAGE)?;
let rigid_nodes_linked_lists =
GpuVector::vector_uninit(backend, capacity * NODES_PER_BLOCK, BufferUsages::STORAGE)?;
let active_blocks = GpuVector::vector_uninit(backend, capacity, BufferUsages::STORAGE)?;
let scan_values = GpuVector::vector_uninit(backend, capacity, BufferUsages::STORAGE)?;
let indirect_n_blocks_groups = Arc::new(GpuVector::scalar_uninit(
backend,
BufferUsages::STORAGE | BufferUsages::INDIRECT,
)?);
let indirect_n_g2p_p2g_groups = Arc::new(GpuVector::scalar_uninit(
backend,
BufferUsages::STORAGE | BufferUsages::INDIRECT,
)?);
let debug = GpuVector::vector(backend, [0, 0], BufferUsages::STORAGE)?;
Ok(Self {
cpu_meta,
meta,
prev_meta,
hmap_entries,
prev_hmap_entries,
nodes,
active_blocks,
scan_values,
indirect_n_blocks_groups,
indirect_n_g2p_p2g_groups,
nodes_linked_lists,
rigid_nodes_linked_lists,
debug,
})
}
pub fn swap_buffers(&mut self) {
std::mem::swap(&mut self.meta, &mut self.prev_meta);
std::mem::swap(&mut self.prev_hmap_entries, &mut self.hmap_entries);
}
}