use crate::prep_readback::{GpuReadbackData, ReadbackData};
use crate::{PhysicsState, RunState, Stage};
use slosh::rapier::na;
use slang_hal::backend::{Backend, WebGpu};
use slang_hal::BufferUsages;
#[cfg(feature = "webgpu")]
use slang_hal::GpuTimingResult;
use slosh::solver::{GpuParticleModelData, SimulationParams};
use stensor::tensor::GpuTensor;
#[cfg(feature = "dim2")]
pub const GPU_DEF_GRAD_STRIDE_BYTES: usize = 16;
#[cfg(feature = "dim3")]
pub const GPU_DEF_GRAD_STRIDE_BYTES: usize = 48;
pub const GPU_DEF_GRAD_STRIDE_F32: usize = GPU_DEF_GRAD_STRIDE_BYTES / 4;
#[derive(Default)]
pub struct SimulationTimes {
pub total_step_time: f32,
pub encoding_time: f32,
pub readback_time: f32,
#[cfg(feature = "webgpu")]
pub gpu_passes: Vec<GpuTimingResult>,
}
#[derive(Default)]
pub struct SimulationStepResult {
pub instances: Vec<ReadbackData>,
pub timings: SimulationTimes,
pub model_data_raw: Vec<u32>,
pub def_grad_raw: Vec<f32>,
}
impl<GpuModel: GpuParticleModelData> Stage<GpuModel> {
pub async fn step_simulation(&mut self) -> bool {
if self.app_state.run_state == RunState::Paused {
return false;
}
let physics = &mut self.physics;
let prev_particle_count = physics.data.particles.len();
for callback in &mut physics.callbacks {
let mut phx = PhysicsState {
backend: &self.gpu,
data: &mut physics.data,
results: &self.step_result,
step_id: self.step_id,
};
callback.update(&mut phx);
}
let new_particle_count = physics.data.particles.len();
if prev_particle_count != new_particle_count {
self.readback =
GpuReadbackData::new(&self.gpu, new_particle_count, self.render_mode, None)
.unwrap();
let model_u32_count = new_particle_count * std::mem::size_of::<GpuModel>() / 4;
self.model_staging = GpuTensor::<u32, WebGpu>::vector_uninit(
&self.gpu,
model_u32_count as u32,
BufferUsages::COPY_DST | BufferUsages::MAP_READ,
)
.unwrap();
self.step_result
.instances
.resize(new_particle_count, ReadbackData::default());
let def_grad_f32_count = new_particle_count * GPU_DEF_GRAD_STRIDE_F32;
self.def_grad_staging = GpuTensor::<f32, WebGpu>::vector_uninit(
&self.gpu,
def_grad_f32_count as u32,
BufferUsages::COPY_DST | BufferUsages::MAP_READ,
)
.unwrap();
self.step_result
.model_data_raw
.resize(model_u32_count, 0);
self.step_result
.def_grad_raw
.resize(def_grad_f32_count, 0.0);
println!("Adjust readback buffers: {}", new_particle_count);
}
let t_total = web_time::Instant::now();
let base_dt = physics.data.base_dt;
let prev_num_substeps = self.app_state.num_substeps;
if self.app_state.min_num_substeps < self.app_state.max_num_substeps {
let bounds = self
.app_state
.pipeline
.timestep_bounds
.compute_bounds(
&self.gpu,
&physics.data.grid,
&physics.data.particles,
&physics.data.timestep_bounds,
&mut physics.data.timestep_bounds_staging,
)
.await
.unwrap();
let num_substeps_estimated = (base_dt / bounds).ceil() as u32;
let num_substeps = num_substeps_estimated.clamp(
self.app_state.min_num_substeps,
self.app_state.max_num_substeps,
);
self.app_state.num_substeps = num_substeps;
} else if self.app_state.num_substeps != self.app_state.max_num_substeps {
self.app_state.num_substeps = self.app_state.max_num_substeps;
}
if prev_num_substeps != self.app_state.num_substeps {
let gravity = physics.data.gravity;
let params = SimulationParams {
gravity,
dt: base_dt / self.app_state.num_substeps as f32,
#[cfg(feature = "dim2")]
padding: 0.0,
};
println!("Updated GPU sim params to: {:?}", params);
let gpu_params = physics.data.sim_params.params.buffer_mut();
self.gpu.write_buffer(gpu_params, 0, &[params]).unwrap();
}
let t_encoding = web_time::Instant::now();
let mut encoder = self.gpu.begin_encoding();
let mut no_state = Box::new(());
let hooks_state = physics.hooks_state.as_deref_mut().unwrap_or(&mut no_state);
#[cfg(feature = "webgpu")]
let mut timestamps = slang_hal::GpuTimestamps::new(
self.gpu.device(),
self.gpu.queue(),
self.app_state.num_substeps * 10,
);
for _ in 0..self.app_state.num_substeps {
self.app_state
.pipeline
.launch_step(
&self.gpu,
&mut encoder,
&mut physics.data,
&mut *self.hooks,
hooks_state,
#[cfg(feature = "webgpu")]
Some(&mut timestamps),
#[cfg(not(feature = "webgpu"))]
None,
)
.await
.unwrap();
}
#[cfg(feature = "webgpu")]
timestamps.resolve(&mut encoder);
self.readback_shader
.launch(
&self.gpu,
&mut encoder,
&mut self.readback,
&physics.data.sim_params,
&physics.data.grid,
&physics.data.particles,
)
.unwrap();
{
let model_buf = physics.data.particles.models().buffer();
let staging_buf = self.model_staging.buffer();
let bytes = physics.data.particles.len() as u64 * std::mem::size_of::<GpuModel>() as u64;
wgpu::CommandEncoder::copy_buffer_to_buffer(
&mut encoder,
model_buf,
0,
staging_buf,
0,
bytes,
);
}
{
let def_grad_buf = physics.data.particles.def_grad.buffer();
let staging_buf = self.def_grad_staging.buffer();
let bytes =
physics.data.particles.len() as u64 * GPU_DEF_GRAD_STRIDE_BYTES as u64;
wgpu::CommandEncoder::copy_buffer_to_buffer(
&mut encoder,
def_grad_buf,
0,
staging_buf,
0,
bytes,
);
}
self.gpu.submit(encoder).unwrap();
let t_encoding = t_encoding.elapsed().as_secs_f32() * 1000.0;
self.gpu.synchronize().unwrap();
let t_total_step = t_total.elapsed().as_secs_f32() * 1000.0;
let t_readback = web_time::Instant::now();
self.gpu
.read_buffer(
self.readback.instances_staging.buffer(),
self.step_result.instances.as_mut_slice(),
)
.await
.unwrap();
self.gpu
.read_buffer(
self.model_staging.buffer(),
self.step_result.model_data_raw.as_mut_slice(),
)
.await
.unwrap();
self.gpu
.read_buffer(
self.def_grad_staging.buffer(),
self.step_result.def_grad_raw.as_mut_slice(),
)
.await
.unwrap();
let t_readback = t_readback.elapsed().as_secs_f32() * 1000.0;
let rapier = &mut self.physics.rapier_data;
rapier.physics_pipeline.step(
&na::zero(),
&rapier.params,
&mut rapier.islands,
&mut rapier.broad_phase,
&mut rapier.narrow_phase,
&mut rapier.bodies,
&mut rapier.colliders,
&mut rapier.impulse_joints,
&mut rapier.multibody_joints,
&mut rapier.ccd_solver,
&(),
&(),
);
if self.app_state.run_state == RunState::Step {
self.app_state.run_state = RunState::Paused;
}
#[cfg(feature = "webgpu")]
let gpu_passes = timestamps.read_results(self.gpu.device()).await.unwrap_or_default();
self.step_result.timings = SimulationTimes {
total_step_time: t_total_step,
encoding_time: t_encoding,
readback_time: t_readback,
#[cfg(feature = "webgpu")]
gpu_passes,
};
self.step_id += 1;
true
}
}