use wgpu::util::DeviceExt;
use wgpu::TextureFormat;
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, PartialEq, bytemuck::Pod, bytemuck::Zeroable)]
pub struct Particle {
pub pos: [f32; 2],
pub vel: [f32; 2],
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct SimParams {
pub max_speed: f32,
pub sep_radius: f32,
pub align_radius: f32,
pub cohesion_radius: f32,
pub sep_weight: f32,
pub align_weight: f32,
pub cohesion_weight: f32,
}
impl Default for SimParams {
fn default() -> Self {
Self {
max_speed: 0.4,
sep_radius: 0.04,
align_radius: 0.10,
cohesion_radius: 0.10,
sep_weight: 1.5,
align_weight: 1.0,
cohesion_weight: 0.8,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct DrawParams {
pub point_size: f32,
pub intensity: f32,
pub tint: [f32; 3],
pub speed_warm: f32,
}
impl Default for DrawParams {
fn default() -> Self {
Self { point_size: 6.0, intensity: 1.0, tint: [0.25, 0.55, 1.0], speed_warm: 2.0 }
}
}
pub fn step_cpu(particles: &[Particle], dt: f32, p: &SimParams) -> Vec<Particle> {
let n = particles.len();
let mut out = Vec::with_capacity(n);
for i in 0..n {
let me = particles[i];
let mut sep = [0.0f32; 2];
let mut align_sum = [0.0f32; 2];
let mut coh_sum = [0.0f32; 2];
let mut align_n = 0.0f32;
let mut coh_n = 0.0f32;
for (j, o) in particles.iter().enumerate() {
if j == i {
continue;
}
let d = [o.pos[0] - me.pos[0], o.pos[1] - me.pos[1]];
let dist = (d[0] * d[0] + d[1] * d[1]).sqrt();
if dist > 0.0 && dist < p.sep_radius {
let inv = 1.0 / (dist * dist);
sep[0] -= d[0] * inv;
sep[1] -= d[1] * inv;
}
if dist < p.align_radius {
align_sum[0] += o.vel[0];
align_sum[1] += o.vel[1];
align_n += 1.0;
}
if dist < p.cohesion_radius {
coh_sum[0] += o.pos[0];
coh_sum[1] += o.pos[1];
coh_n += 1.0;
}
}
let mut acc = [sep[0] * p.sep_weight, sep[1] * p.sep_weight];
if align_n > 0.0 {
acc[0] += (align_sum[0] / align_n - me.vel[0]) * p.align_weight;
acc[1] += (align_sum[1] / align_n - me.vel[1]) * p.align_weight;
}
if coh_n > 0.0 {
acc[0] += (coh_sum[0] / coh_n - me.pos[0]) * p.cohesion_weight;
acc[1] += (coh_sum[1] / coh_n - me.pos[1]) * p.cohesion_weight;
}
let mut vel = [me.vel[0] + acc[0] * dt, me.vel[1] + acc[1] * dt];
let sp = (vel[0] * vel[0] + vel[1] * vel[1]).sqrt();
if sp > p.max_speed {
let k = p.max_speed / sp;
vel[0] *= k;
vel[1] *= k;
}
let mut pos = [me.pos[0] + vel[0] * dt, me.pos[1] + vel[1] * dt];
pos[0] -= pos[0].floor();
pos[1] -= pos[1].floor();
out.push(Particle { pos, vel });
}
out
}
#[repr(C)]
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
struct SimUniforms {
a: [f32; 4],
b: [f32; 4],
c: [f32; 4],
}
#[repr(C)]
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
struct DrawUniforms {
a: [f32; 4],
tint: [f32; 4],
}
pub const PARTICLES_WGSL: &str = include_str!("particles.wgsl");
pub struct ParticleBuffers {
a: wgpu::Buffer,
b: wgpu::Buffer,
count: u32,
}
impl ParticleBuffers {
#[must_use]
pub fn count(&self) -> u32 {
self.count
}
}
pub struct GpuParticles {
step_pipeline: wgpu::ComputePipeline,
step_bgl: wgpu::BindGroupLayout,
draw_pipeline: wgpu::RenderPipeline,
draw_bgl: wgpu::BindGroupLayout,
sim_u: wgpu::Buffer,
draw_u: wgpu::Buffer,
target_format: TextureFormat,
}
impl GpuParticles {
pub fn new(device: &wgpu::Device, target_format: TextureFormat) -> Self {
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("l0_particles"),
source: wgpu::ShaderSource::Wgsl(PARTICLES_WGSL.into()),
});
let step_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("l0_particles_step_bgl"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, min_binding_size: None },
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None },
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, min_binding_size: None },
count: None,
},
],
});
let step_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some("l0_particles_step"),
layout: Some(&device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("l0_particles_step_pll"),
bind_group_layouts: &[Some(&step_bgl)],
immediate_size: 0,
})),
module: &shader,
entry_point: Some("cs_step"),
compilation_options: Default::default(),
cache: None,
});
let draw_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("l0_particles_draw_bgl"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, min_binding_size: None },
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::VERTEX,
ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: false, min_binding_size: None },
count: None,
},
],
});
let blend = Some(wgpu::BlendState {
color: wgpu::BlendComponent { src_factor: wgpu::BlendFactor::One, dst_factor: wgpu::BlendFactor::One, operation: wgpu::BlendOperation::Add },
alpha: wgpu::BlendComponent { src_factor: wgpu::BlendFactor::One, dst_factor: wgpu::BlendFactor::One, operation: wgpu::BlendOperation::Add },
});
let draw_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
label: Some("l0_particles_draw"),
layout: Some(&device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("l0_particles_draw_pll"),
bind_group_layouts: &[Some(&draw_bgl)],
immediate_size: 0,
})),
vertex: wgpu::VertexState { module: &shader, entry_point: Some("pt_vs"), compilation_options: Default::default(), buffers: &[] },
primitive: wgpu::PrimitiveState { topology: wgpu::PrimitiveTopology::TriangleList, ..Default::default() },
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: Some("pt_fs"),
compilation_options: Default::default(),
targets: &[Some(wgpu::ColorTargetState { format: target_format, blend, write_mask: wgpu::ColorWrites::ALL })],
}),
multiview_mask: None,
cache: None,
});
let mkbuf = |label: &str| device.create_buffer(&wgpu::BufferDescriptor {
label: Some(label),
size: std::mem::size_of::<SimUniforms>().max(std::mem::size_of::<DrawUniforms>()) as u64,
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
Self {
step_pipeline,
step_bgl,
draw_pipeline,
draw_bgl,
sim_u: mkbuf("l0_particles_sim_u"),
draw_u: mkbuf("l0_particles_draw_u"),
target_format,
}
}
#[must_use]
pub fn target_format(&self) -> TextureFormat {
self.target_format
}
pub fn make_buffers(&self, device: &wgpu::Device, init: &[Particle]) -> ParticleBuffers {
let usage = wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::COPY_DST;
let a = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("l0_particles_a"),
contents: bytemuck::cast_slice(init),
usage,
});
let b = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("l0_particles_b"),
size: (std::mem::size_of::<Particle>() * init.len().max(1)) as u64,
usage,
mapped_at_creation: false,
});
ParticleBuffers { a, b, count: init.len() as u32 }
}
pub fn step(
&self,
device: &wgpu::Device,
queue: &wgpu::Queue,
encoder: &mut wgpu::CommandEncoder,
bufs: &mut ParticleBuffers,
dt: f32,
p: &SimParams,
) {
let u = SimUniforms {
a: [dt, bufs.count as f32, p.max_speed, p.sep_radius],
b: [p.align_radius, p.cohesion_radius, p.sep_weight, p.align_weight],
c: [p.cohesion_weight, 0.0, 0.0, 0.0],
};
queue.write_buffer(&self.sim_u, 0, bytemuck::bytes_of(&u));
let bind = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("l0_particles_step_bind"),
layout: &self.step_bgl,
entries: &[
wgpu::BindGroupEntry { binding: 0, resource: self.sim_u.as_entire_binding() },
wgpu::BindGroupEntry { binding: 1, resource: bufs.a.as_entire_binding() },
wgpu::BindGroupEntry { binding: 2, resource: bufs.b.as_entire_binding() },
],
});
{
let mut cp = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: Some("l0_particles_step_pass"), timestamp_writes: None });
cp.set_pipeline(&self.step_pipeline);
cp.set_bind_group(0, &bind, &[]);
cp.dispatch_workgroups(bufs.count.div_ceil(64), 1, 1);
}
std::mem::swap(&mut bufs.a, &mut bufs.b);
}
#[allow(clippy::too_many_arguments)]
pub fn render(
&self,
device: &wgpu::Device,
queue: &wgpu::Queue,
encoder: &mut wgpu::CommandEncoder,
bufs: &ParticleBuffers,
target: &wgpu::TextureView,
d: &DrawParams,
load: bool,
w: u32,
h: u32,
) {
let u = DrawUniforms {
a: [d.point_size, d.intensity, w.max(1) as f32, h.max(1) as f32],
tint: [d.tint[0], d.tint[1], d.tint[2], d.speed_warm],
};
queue.write_buffer(&self.draw_u, 0, bytemuck::bytes_of(&u));
let bind = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("l0_particles_draw_bind"),
layout: &self.draw_bgl,
entries: &[
wgpu::BindGroupEntry { binding: 0, resource: self.draw_u.as_entire_binding() },
wgpu::BindGroupEntry { binding: 1, resource: bufs.a.as_entire_binding() },
],
});
let load_op = if load { wgpu::LoadOp::Load } else { wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT) };
let mut rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("l0_particles_draw_pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: target,
resolve_target: None,
depth_slice: None,
ops: wgpu::Operations { load: load_op, store: wgpu::StoreOp::Store },
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
multiview_mask: None,
});
rp.set_pipeline(&self.draw_pipeline);
rp.set_bind_group(0, &bind, &[]);
rp.draw(0..6, 0..bufs.count);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn ring(n: usize) -> Vec<Particle> {
(0..n)
.map(|i| {
let t = i as f32 / n as f32 * std::f32::consts::TAU;
Particle { pos: [0.5 + 0.08 * t.cos(), 0.5 + 0.08 * t.sin()], vel: [-0.1 * t.sin(), 0.1 * t.cos()] }
})
.collect()
}
#[test]
fn cpu_step_is_deterministic() {
let p = SimParams::default();
let init = ring(32);
let a = step_cpu(&init, 0.016, &p);
let b = step_cpu(&init, 0.016, &p);
assert_eq!(a.len(), init.len());
for (x, y) in a.iter().zip(&b) {
assert_eq!(x.pos[0].to_bits(), y.pos[0].to_bits(), "deterministic pos.x");
assert_eq!(x.vel[1].to_bits(), y.vel[1].to_bits(), "deterministic vel.y");
}
}
#[test]
fn step_keeps_bounds_and_clamps_speed() {
let p = SimParams::default();
let mut cur = ring(48);
for _ in 0..60 {
cur = step_cpu(&cur, 0.02, &p);
}
for q in &cur {
assert!((0.0..1.0).contains(&q.pos[0]) && (0.0..1.0).contains(&q.pos[1]), "in box: {:?}", q.pos);
let sp = (q.vel[0] * q.vel[0] + q.vel[1] * q.vel[1]).sqrt();
assert!(sp <= p.max_speed + 1e-4, "speed clamped: {sp}");
}
}
#[test]
fn separation_pushes_neighbours_apart() {
let p = SimParams { align_weight: 0.0, cohesion_weight: 0.0, ..SimParams::default() };
let init = vec![
Particle { pos: [0.50, 0.5], vel: [0.0, 0.0] },
Particle { pos: [0.51, 0.5], vel: [0.0, 0.0] },
];
let d0 = (init[1].pos[0] - init[0].pos[0]).abs();
let next = step_cpu(&init, 0.05, &p);
let d1 = (next[1].pos[0] - next[0].pos[0]).abs();
assert!(d1 > d0, "separation increased the gap ({d0} → {d1})");
}
fn compute_device() -> Option<(wgpu::Device, wgpu::Queue)> {
let instance = wgpu::Instance::default();
let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
power_preference: wgpu::PowerPreference::default(),
force_fallback_adapter: false,
compatible_surface: None,
}))
.ok()?;
if !adapter.get_downlevel_capabilities().flags.contains(wgpu::DownlevelFlags::COMPUTE_SHADERS) {
return None;
}
if adapter.limits().max_storage_buffers_per_shader_stage < 2 {
return None;
}
pollster::block_on(adapter.request_device(&wgpu::DeviceDescriptor {
label: Some("l0-particles-proof"),
required_features: wgpu::Features::empty(),
required_limits: adapter.limits(),
memory_hints: wgpu::MemoryHints::default(),
experimental_features: wgpu::ExperimentalFeatures::disabled(),
trace: wgpu::Trace::Off,
}))
.ok()
}
fn read_particles(device: &wgpu::Device, queue: &wgpu::Queue, buf: &wgpu::Buffer, n: u32) -> Vec<Particle> {
let size = (std::mem::size_of::<Particle>() as u32 * n) as u64;
let readback = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("l0-particles-readback"),
size,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
let mut enc = device.create_command_encoder(&Default::default());
enc.copy_buffer_to_buffer(buf, 0, &readback, 0, size);
queue.submit(Some(enc.finish()));
let slice = readback.slice(..);
let (tx, rx) = std::sync::mpsc::channel();
slice.map_async(wgpu::MapMode::Read, move |r| { let _ = tx.send(r); });
device.poll(wgpu::PollType::wait_indefinitely()).ok();
rx.recv().unwrap().unwrap();
let data = slice.get_mapped_range();
let out: Vec<Particle> = bytemuck::cast_slice(&data).to_vec();
drop(data);
readback.unmap();
out
}
#[test]
fn gpu_step_matches_cpu_reference() {
let Some((device, queue)) = compute_device() else {
eprintln!("[particles] no compute device — skipping GPU parity proof");
return;
};
let p = SimParams::default();
let init = ring(40);
let dt = 0.016f32;
let gp = GpuParticles::new(&device, TextureFormat::Rgba16Float);
let mut bufs = gp.make_buffers(&device, &init);
let mut enc = device.create_command_encoder(&Default::default());
gp.step(&device, &queue, &mut enc, &mut bufs, dt, &p);
queue.submit(Some(enc.finish()));
device.poll(wgpu::PollType::wait_indefinitely()).ok();
let gpu = read_particles(&device, &queue, &bufs.a, bufs.count);
let cpu = step_cpu(&init, dt, &p);
assert_eq!(gpu.len(), cpu.len());
let mut moved = false;
for (g, c) in gpu.iter().zip(&cpu) {
assert!((g.pos[0] - c.pos[0]).abs() < 1e-3, "pos.x parity: gpu {} vs cpu {}", g.pos[0], c.pos[0]);
assert!((g.pos[1] - c.pos[1]).abs() < 1e-3, "pos.y parity: gpu {} vs cpu {}", g.pos[1], c.pos[1]);
assert!((g.vel[0] - c.vel[0]).abs() < 1e-3, "vel.x parity");
if (g.pos[0] - 0.5).abs() > 1e-4 {
moved = true;
}
}
assert!(moved, "the step actually advanced particles");
}
#[test]
fn additive_render_accumulates_overlap() {
let Some((device, queue)) = compute_device() else {
eprintln!("[particles] no compute device — skipping additive render proof");
return;
};
let (w, h) = (64u32, 64u32);
let parts = vec![
Particle { pos: [0.25, 0.5], vel: [0.0, 0.0] },
Particle { pos: [0.75, 0.5], vel: [0.0, 0.0] },
Particle { pos: [0.75, 0.5], vel: [0.0, 0.0] },
];
let gp = GpuParticles::new(&device, TextureFormat::Rgba8Unorm);
let bufs = gp.make_buffers(&device, &parts);
let target = device.create_texture(&wgpu::TextureDescriptor {
label: Some("l0-particles-target"),
size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
view_formats: &[],
});
let view = target.create_view(&Default::default());
let draw = DrawParams { point_size: 10.0, intensity: 0.45, tint: [0.4, 0.6, 1.0], speed_warm: 0.0 };
let mut enc = device.create_command_encoder(&Default::default());
gp.render(&device, &queue, &mut enc, &bufs, &view, &draw, false, w, h);
let bpp = 4u32;
let unpadded = w * bpp;
let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT;
let padded = unpadded.div_ceil(align) * align;
let readback = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("l0-particles-px-readback"),
size: (padded * h) as u64,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
enc.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo { texture: &target, mip_level: 0, origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All },
wgpu::TexelCopyBufferInfo { buffer: &readback, layout: wgpu::TexelCopyBufferLayout { offset: 0, bytes_per_row: Some(padded), rows_per_image: Some(h) } },
wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
);
queue.submit(Some(enc.finish()));
let slice = readback.slice(..);
let (tx, rx) = std::sync::mpsc::channel();
slice.map_async(wgpu::MapMode::Read, move |r| { let _ = tx.send(r); });
device.poll(wgpu::PollType::wait_indefinitely()).ok();
rx.recv().unwrap().unwrap();
let data = slice.get_mapped_range();
let mut rgba = Vec::with_capacity((w * h * 4) as usize);
for row in 0..h {
let s = (row * padded) as usize;
rgba.extend_from_slice(&data[s..s + unpadded as usize]);
}
drop(data);
readback.unmap();
let at = |x: u32, y: u32| -> u32 {
let i = ((y * w + x) * 4) as usize;
rgba[i] as u32 + rgba[i + 1] as u32 + rgba[i + 2] as u32
};
let lit = rgba.chunks_exact(4).filter(|p| p[0] as u32 + p[1] as u32 + p[2] as u32 > 0).count();
assert!(lit > 0, "particles drew something ({lit} lit px)");
let single = at(w / 4, h / 2); let overlap = at(3 * w / 4, h / 2); assert!(single > 0, "single particle lit its centre ({single})");
assert!(overlap > single + 10, "two stacked particles add brighter ({overlap}) than one ({single})");
}
#[test]
fn shader_entry_points_and_uniform_sizes() {
assert!(PARTICLES_WGSL.contains("fn cs_step"));
assert!(PARTICLES_WGSL.contains("fn pt_vs"));
assert!(PARTICLES_WGSL.contains("fn pt_fs"));
assert!(PARTICLES_WGSL.contains("@workgroup_size(64)"));
assert_eq!(std::mem::size_of::<Particle>(), 16);
assert_eq!(std::mem::size_of::<SimUniforms>(), 48);
assert_eq!(std::mem::size_of::<DrawUniforms>(), 32);
}
}