#![allow(clippy::ptr_arg)]
#![allow(dead_code)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BufferUsage {
Vertex,
Index,
Uniform,
Storage,
Staging,
}
#[derive(Debug, Clone)]
pub struct ComputeBuffer {
pub data: Vec<f32>,
pub usage: BufferUsage,
pub label: String,
}
impl ComputeBuffer {
pub fn new(size: usize, usage: BufferUsage, label: &str) -> Self {
Self {
data: vec![0.0_f32; size],
usage,
label: label.to_owned(),
}
}
pub fn write_f32(&mut self, offset: usize, values: &[f32]) {
let end = offset + values.len();
assert!(
end <= self.data.len(),
"write_f32: out-of-bounds write (offset={offset}, len={}, capacity={})",
values.len(),
self.data.len()
);
self.data[offset..end].copy_from_slice(values);
}
pub fn read_f32(&self, offset: usize, count: usize) -> Vec<f32> {
let end = offset + count;
assert!(
end <= self.data.len(),
"read_f32: out-of-bounds read (offset={offset}, count={count}, capacity={})",
self.data.len()
);
self.data[offset..end].to_vec()
}
pub fn byte_size(&self) -> usize {
self.data.len() * std::mem::size_of::<f32>()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct WorkgroupSize {
pub x: u32,
pub y: u32,
pub z: u32,
}
impl WorkgroupSize {
pub fn dispatch_count(total: u32, workgroup: u32) -> u32 {
assert!(workgroup > 0, "workgroup size must be > 0");
total.div_ceil(workgroup)
}
}
impl Default for WorkgroupSize {
fn default() -> Self {
Self { x: 64, y: 1, z: 1 }
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ComputeKernelKind {
VelocityUpdate,
PressureJacobi,
ParticleForce,
NeighborSearch,
Custom(String),
}
pub struct CpuComputeDispatch {
pub kernel: ComputeKernelKind,
pub workgroup_size: WorkgroupSize,
}
impl CpuComputeDispatch {
pub fn new(kernel: ComputeKernelKind, wg: WorkgroupSize) -> Self {
Self {
kernel,
workgroup_size: wg,
}
}
pub fn dispatch_velocity_update(
&self,
pos: &mut ComputeBuffer,
vel: &mut ComputeBuffer,
force: &ComputeBuffer,
mass: &ComputeBuffer,
dt: f32,
n: usize,
) {
for i in 0..n {
pos.data[i] += vel.data[i] * dt;
vel.data[i] += force.data[i] / mass.data[i] * dt;
}
}
pub fn dispatch_pressure_jacobi(
&self,
p: &mut ComputeBuffer,
p_old: &ComputeBuffer,
rhs: &ComputeBuffer,
nx: usize,
ny: usize,
dx: f32,
) {
let dx2 = dx * dx;
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let idx = j * nx + i;
p.data[idx] = (p_old.data[idx + 1]
+ p_old.data[idx - 1]
+ p_old.data[idx + nx]
+ p_old.data[idx - nx]
- dx2 * rhs.data[idx])
/ 4.0;
}
}
}
pub fn dispatch_particle_force(
&self,
pos: &ComputeBuffer,
force: &mut ComputeBuffer,
eps: f32,
sigma: f32,
n: usize,
) {
for v in force.data[..2 * n].iter_mut() {
*v = 0.0;
}
for i in 0..n {
for j in (i + 1)..n {
let dx = pos.data[2 * j] - pos.data[2 * i];
let dy = pos.data[2 * j + 1] - pos.data[2 * i + 1];
let r2 = dx * dx + dy * dy;
if r2 < 1e-12 {
continue;
}
let sr2 = (sigma * sigma) / r2;
let sr6 = sr2 * sr2 * sr2;
let sr12 = sr6 * sr6;
let fmag = 24.0 * eps / r2 * (2.0 * sr12 - sr6);
force.data[2 * i] -= fmag * dx;
force.data[2 * i + 1] -= fmag * dy;
force.data[2 * j] += fmag * dx;
force.data[2 * j + 1] += fmag * dy;
}
}
}
}
#[derive(Debug, Clone, Default)]
pub struct GpuStats {
pub dispatch_count: u64,
pub bytes_transferred: u64,
pub kernel_time_ms: f64,
}
impl GpuStats {
pub fn new() -> Self {
Self::default()
}
pub fn record_dispatch(&mut self, bytes: u64, time_ms: f64) {
self.dispatch_count += 1;
self.bytes_transferred += bytes;
self.kernel_time_ms += time_ms;
}
}
pub fn jacobi_step_2d(
p_new: &mut Vec<f32>,
p_old: &[f32],
rhs: &[f32],
nx: usize,
ny: usize,
dx: f32,
) {
let dx2 = dx * dx;
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let idx = j * nx + i;
p_new[idx] = (p_old[idx + 1] + p_old[idx - 1] + p_old[idx + nx] + p_old[idx - nx]
- dx2 * rhs[idx])
/ 4.0;
}
}
}
pub fn pressure_poisson_solve(
p: &mut Vec<f32>,
rhs: &[f32],
nx: usize,
ny: usize,
dx: f32,
n_iter: usize,
) -> f32 {
let size = nx * ny;
let mut p_old = p.clone();
for _ in 0..n_iter {
jacobi_step_2d(p, &p_old, rhs, nx, ny, dx);
p_old.copy_from_slice(&p[..size]);
}
let dx2 = dx * dx;
let mut residual = 0.0_f32;
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let idx = j * nx + i;
let lap = (p[idx + 1] + p[idx - 1] + p[idx + nx] + p[idx - nx] - 4.0 * p[idx]) / dx2;
let r = (lap - rhs[idx]).abs();
if r > residual {
residual = r;
}
}
}
residual
}
pub struct PipelineCache {
capacity: usize,
entries: Vec<(String, CpuComputeDispatch)>,
}
impl PipelineCache {
pub fn new(capacity: usize) -> Self {
Self {
capacity,
entries: Vec::new(),
}
}
pub fn insert(&mut self, key: &str, pipeline: CpuComputeDispatch) {
self.entries.retain(|(k, _)| k != key);
while self.entries.len() >= self.capacity {
self.entries.remove(0);
}
self.entries.push((key.to_owned(), pipeline));
}
pub fn get(&self, key: &str) -> Option<&CpuComputeDispatch> {
self.entries.iter().find(|(k, _)| k == key).map(|(_, v)| v)
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn clear(&mut self) {
self.entries.clear();
}
}
#[derive(Debug, Clone, Default)]
pub struct PipelineStats {
pub total_dispatches: u64,
pub total_workgroups: u64,
pub total_invocations: u64,
pub cache_hits: u64,
pub cache_misses: u64,
}
impl PipelineStats {
pub fn record_dispatch(&mut self, num_workgroups: u64, wg_size: WorkgroupSize) {
self.total_dispatches += 1;
self.total_workgroups += num_workgroups;
self.total_invocations +=
num_workgroups * (wg_size.x as u64) * (wg_size.y as u64) * (wg_size.z as u64);
}
pub fn cache_hit_ratio(&self) -> f64 {
let total = self.cache_hits + self.cache_misses;
if total == 0 {
return f64::NAN;
}
self.cache_hits as f64 / total as f64
}
}
#[derive(Debug, Clone)]
pub struct ComputePass {
pub label: String,
pub kernel: ComputeKernelKind,
pub workgroup_size: WorkgroupSize,
pub buffer_bindings: Vec<usize>,
}
#[derive(Debug)]
pub struct MultiPassPipeline {
pub label: String,
pub passes: Vec<ComputePass>,
}
impl MultiPassPipeline {
pub fn new(label: &str) -> Self {
Self {
label: label.to_owned(),
passes: Vec::new(),
}
}
pub fn add_pass(&mut self, pass: ComputePass) {
self.passes.push(pass);
}
pub fn num_passes(&self) -> usize {
self.passes.len()
}
}
pub fn validate_resource_bindings(pass: &ComputePass, buffers: &[ComputeBuffer]) -> Vec<String> {
let mut errors = Vec::new();
let mut seen = std::collections::HashSet::new();
for &idx in &pass.buffer_bindings {
if idx >= buffers.len() {
errors.push(format!(
"Pass '{}': buffer binding {} is out of range (have {} buffers)",
pass.label,
idx,
buffers.len()
));
}
if !seen.insert(idx) {
errors.push(format!(
"Pass '{}': Duplicate buffer binding {}",
pass.label, idx
));
}
}
errors
}
pub fn validate_pipeline(pipeline: &MultiPassPipeline, buffers: &[ComputeBuffer]) -> Vec<String> {
let mut errors = Vec::new();
for pass in &pipeline.passes {
errors.extend(validate_resource_bindings(pass, buffers));
}
errors
}
pub fn sor_step_2d(
p: &mut Vec<f32>,
p_old: &[f32],
rhs: &[f32],
nx: usize,
ny: usize,
dx: f32,
omega: f32,
) {
let dx2 = dx * dx;
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let idx = j * nx + i;
let gs = (p_old[idx + 1] + p_old[idx - 1] + p_old[idx + nx] + p_old[idx - nx]
- dx2 * rhs[idx])
/ 4.0;
p[idx] = (1.0 - omega) * p_old[idx] + omega * gs;
}
}
}
pub fn red_black_gauss_seidel_step(p: &mut Vec<f32>, rhs: &[f32], nx: usize, ny: usize, dx: f32) {
let dx2 = dx * dx;
for j in 1..ny - 1 {
for i in 1..nx - 1 {
if (i + j) % 2 == 0 {
let idx = j * nx + i;
p[idx] =
(p[idx + 1] + p[idx - 1] + p[idx + nx] + p[idx - nx] - dx2 * rhs[idx]) / 4.0;
}
}
}
for j in 1..ny - 1 {
for i in 1..nx - 1 {
if (i + j) % 2 == 1 {
let idx = j * nx + i;
p[idx] =
(p[idx + 1] + p[idx - 1] + p[idx + nx] + p[idx - nx] - dx2 * rhs[idx]) / 4.0;
}
}
}
}
pub fn compute_linf_residual(p: &[f32], rhs: &[f32], nx: usize, ny: usize, dx: f32) -> f32 {
let dx2 = dx * dx;
let mut residual = 0.0_f32;
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let idx = j * nx + i;
let lap = (p[idx + 1] + p[idx - 1] + p[idx + nx] + p[idx - nx] - 4.0 * p[idx]) / dx2;
let r = (lap - rhs[idx]).abs();
if r > residual {
residual = r;
}
}
}
residual
}
pub fn dispatch_neighbor_search(positions: &[f32], n: usize, cutoff: f32) -> Vec<Vec<usize>> {
let cutoff2 = cutoff * cutoff;
let mut neighbors = vec![Vec::new(); n];
for i in 0..n {
for j in (i + 1)..n {
let dx = positions[2 * j] - positions[2 * i];
let dy = positions[2 * j + 1] - positions[2 * i + 1];
let r2 = dx * dx + dy * dy;
if r2 < cutoff2 {
neighbors[i].push(j);
neighbors[j].push(i);
}
}
}
neighbors
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn buffer_usage_eq() {
assert_eq!(BufferUsage::Storage, BufferUsage::Storage);
assert_ne!(BufferUsage::Vertex, BufferUsage::Index);
}
#[test]
fn buffer_usage_clone() {
let u = BufferUsage::Uniform;
assert_eq!(u.clone(), BufferUsage::Uniform);
}
#[test]
fn compute_buffer_new_zeroed() {
let buf = ComputeBuffer::new(8, BufferUsage::Storage, "test");
assert_eq!(buf.data.len(), 8);
assert!(buf.data.iter().all(|&v| v == 0.0));
assert_eq!(buf.label, "test");
}
#[test]
fn compute_buffer_byte_size() {
let buf = ComputeBuffer::new(4, BufferUsage::Uniform, "u");
assert_eq!(buf.byte_size(), 16);
}
#[test]
fn compute_buffer_write_read_roundtrip() {
let mut buf = ComputeBuffer::new(8, BufferUsage::Storage, "rw");
buf.write_f32(2, &[1.0, 2.0, 3.0]);
let out = buf.read_f32(2, 3);
assert_eq!(out, vec![1.0, 2.0, 3.0]);
}
#[test]
fn compute_buffer_write_at_offset_zero() {
let mut buf = ComputeBuffer::new(4, BufferUsage::Storage, "s");
buf.write_f32(0, &[9.0, 8.0, 7.0, 6.0]);
assert_eq!(buf.data, vec![9.0, 8.0, 7.0, 6.0]);
}
#[test]
#[should_panic(expected = "out-of-bounds write")]
fn compute_buffer_write_oob_panics() {
let mut buf = ComputeBuffer::new(4, BufferUsage::Storage, "oob");
buf.write_f32(3, &[1.0, 2.0]); }
#[test]
#[should_panic(expected = "out-of-bounds read")]
fn compute_buffer_read_oob_panics() {
let buf = ComputeBuffer::new(4, BufferUsage::Storage, "oob");
let _ = buf.read_f32(3, 2);
}
#[test]
fn workgroup_dispatch_count_exact() {
assert_eq!(WorkgroupSize::dispatch_count(64, 64), 1);
}
#[test]
fn workgroup_dispatch_count_ceil() {
assert_eq!(WorkgroupSize::dispatch_count(65, 64), 2);
assert_eq!(WorkgroupSize::dispatch_count(1, 64), 1);
}
#[test]
fn workgroup_dispatch_count_zero_total() {
assert_eq!(WorkgroupSize::dispatch_count(0, 64), 0);
}
#[test]
fn workgroup_default() {
let wg = WorkgroupSize::default();
assert_eq!(wg.x, 64);
assert_eq!(wg.y, 1);
assert_eq!(wg.z, 1);
}
#[test]
fn kernel_kind_custom_eq() {
let a = ComputeKernelKind::Custom("foo".into());
let b = ComputeKernelKind::Custom("foo".into());
assert_eq!(a, b);
}
#[test]
fn kernel_kind_variants_neq() {
assert_ne!(
ComputeKernelKind::VelocityUpdate,
ComputeKernelKind::PressureJacobi
);
}
#[test]
fn velocity_update_basic() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::VelocityUpdate, WorkgroupSize::default());
let n = 3;
let mut pos = ComputeBuffer::new(n, BufferUsage::Storage, "pos");
let mut vel = ComputeBuffer::new(n, BufferUsage::Storage, "vel");
let mut force = ComputeBuffer::new(n, BufferUsage::Storage, "force");
let mut mass = ComputeBuffer::new(n, BufferUsage::Storage, "mass");
pos.write_f32(0, &[0.0, 1.0, 2.0]);
vel.write_f32(0, &[1.0, 0.5, -1.0]);
force.write_f32(0, &[0.0, 1.0, 0.0]);
mass.write_f32(0, &[1.0, 2.0, 1.0]);
let dt = 0.1_f32;
disp.dispatch_velocity_update(&mut pos, &mut vel, &force, &mass, dt, n);
assert!((pos.data[0] - 0.1).abs() < 1e-6);
assert!((vel.data[0] - 1.0).abs() < 1e-6);
assert!((pos.data[1] - 1.05).abs() < 1e-6);
assert!((vel.data[1] - 0.55).abs() < 1e-6);
}
#[test]
fn velocity_update_zero_force() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::VelocityUpdate, WorkgroupSize::default());
let n = 2;
let mut pos = ComputeBuffer::new(n, BufferUsage::Storage, "pos");
let mut vel = ComputeBuffer::new(n, BufferUsage::Storage, "vel");
let force = ComputeBuffer::new(n, BufferUsage::Storage, "force");
let mut mass = ComputeBuffer::new(n, BufferUsage::Storage, "mass");
pos.write_f32(0, &[0.0, 0.0]);
vel.write_f32(0, &[2.0, -3.0]);
mass.write_f32(0, &[1.0, 1.0]);
disp.dispatch_velocity_update(&mut pos, &mut vel, &force, &mass, 0.5, n);
assert!((pos.data[0] - 1.0).abs() < 1e-6);
assert!((pos.data[1] - (-1.5)).abs() < 1e-6);
assert!((vel.data[0] - 2.0).abs() < 1e-6);
assert!((vel.data[1] - (-3.0)).abs() < 1e-6);
}
#[test]
fn pressure_jacobi_interior_update() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::PressureJacobi, WorkgroupSize::default());
let nx = 4;
let ny = 4;
let mut p = ComputeBuffer::new(nx * ny, BufferUsage::Storage, "p");
let mut p_old = ComputeBuffer::new(nx * ny, BufferUsage::Storage, "p_old");
let rhs = ComputeBuffer::new(nx * ny, BufferUsage::Storage, "rhs");
for v in p_old.data.iter_mut() {
*v = 1.0;
}
disp.dispatch_pressure_jacobi(&mut p, &p_old, &rhs, nx, ny, 1.0);
let idx = nx + 1;
assert!((p.data[idx] - 1.0).abs() < 1e-6);
}
#[test]
fn pressure_jacobi_boundary_unchanged() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::PressureJacobi, WorkgroupSize::default());
let nx = 5;
let ny = 5;
let mut p = ComputeBuffer::new(nx * ny, BufferUsage::Storage, "p");
let p_old = ComputeBuffer::new(nx * ny, BufferUsage::Storage, "p_old");
let rhs = ComputeBuffer::new(nx * ny, BufferUsage::Storage, "rhs");
disp.dispatch_pressure_jacobi(&mut p, &p_old, &rhs, nx, ny, 1.0);
assert_eq!(p.data[0], 0.0); assert_eq!(p.data[4], 0.0); }
#[test]
fn particle_force_zero_at_large_sep() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::ParticleForce, WorkgroupSize::default());
let n = 2;
let mut pos = ComputeBuffer::new(2 * n, BufferUsage::Storage, "pos");
let mut force = ComputeBuffer::new(2 * n, BufferUsage::Storage, "force");
pos.write_f32(0, &[0.0, 0.0, 1000.0, 0.0]);
disp.dispatch_particle_force(&pos, &mut force, 1.0, 1.0, n);
assert!(force.data[0].abs() < 1e-10);
}
#[test]
fn particle_force_newton3() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::ParticleForce, WorkgroupSize::default());
let n = 2;
let mut pos = ComputeBuffer::new(2 * n, BufferUsage::Storage, "pos");
let mut force = ComputeBuffer::new(2 * n, BufferUsage::Storage, "force");
pos.write_f32(0, &[0.0, 0.0, 1.5, 0.0]);
disp.dispatch_particle_force(&pos, &mut force, 1.0, 1.0, n);
assert!((force.data[0] + force.data[2]).abs() < 1e-5);
assert!((force.data[1] + force.data[3]).abs() < 1e-5);
}
#[test]
fn gpu_stats_initial_zero() {
let s = GpuStats::new();
assert_eq!(s.dispatch_count, 0);
assert_eq!(s.bytes_transferred, 0);
assert_eq!(s.kernel_time_ms, 0.0);
}
#[test]
fn gpu_stats_accumulate() {
let mut s = GpuStats::new();
s.record_dispatch(128, 0.5);
s.record_dispatch(256, 1.0);
assert_eq!(s.dispatch_count, 2);
assert_eq!(s.bytes_transferred, 384);
assert!((s.kernel_time_ms - 1.5).abs() < 1e-9);
}
#[test]
fn jacobi_step_2d_uniform_field() {
let nx = 4;
let ny = 4;
let size = nx * ny;
let mut p_new = vec![0.0_f32; size];
let p_old = vec![1.0_f32; size];
let rhs = vec![0.0_f32; size];
jacobi_step_2d(&mut p_new, &p_old, &rhs, nx, ny, 1.0);
for j in 1..ny - 1 {
for i in 1..nx - 1 {
assert!((p_new[j * nx + i] - 1.0).abs() < 1e-6);
}
}
}
#[test]
fn jacobi_step_2d_rhs_effect() {
let nx = 4;
let ny = 4;
let size = nx * ny;
let mut p_new = vec![0.0_f32; size];
let p_old = vec![4.0_f32; size];
let rhs = vec![4.0_f32; size];
jacobi_step_2d(&mut p_new, &p_old, &rhs, nx, ny, 1.0);
for j in 1..ny - 1 {
for i in 1..nx - 1 {
assert!((p_new[j * nx + i] - 3.0).abs() < 1e-6);
}
}
}
#[test]
fn pressure_poisson_zero_rhs_zero_bc() {
let nx = 5;
let ny = 5;
let mut p = vec![0.0_f32; nx * ny];
let rhs = vec![0.0_f32; nx * ny];
let residual = pressure_poisson_solve(&mut p, &rhs, nx, ny, 0.1, 50);
assert!(residual < 1e-6, "residual={residual}");
}
#[test]
fn pressure_poisson_residual_decreases() {
let nx = 6;
let ny = 6;
let mut p1 = vec![0.0_f32; nx * ny];
let mut p2 = p1.clone();
let rhs: Vec<f32> = (0..(nx * ny)).map(|k| (k as f32).sin()).collect();
let dx = 0.1;
let r1 = pressure_poisson_solve(&mut p1, &rhs, nx, ny, dx, 10);
let r2 = pressure_poisson_solve(&mut p2, &rhs, nx, ny, dx, 200);
assert!(
r2 <= r1 + 1e-4,
"more iterations should not increase residual (r1={r1}, r2={r2})"
);
}
#[test]
fn pipeline_cache_insert_and_get() {
let mut cache = PipelineCache::new(4);
let disp =
CpuComputeDispatch::new(ComputeKernelKind::VelocityUpdate, WorkgroupSize::default());
cache.insert("vel_update", disp);
assert!(cache.get("vel_update").is_some());
assert!(cache.get("nonexistent").is_none());
}
#[test]
fn pipeline_cache_eviction() {
let mut cache = PipelineCache::new(2);
let d1 =
CpuComputeDispatch::new(ComputeKernelKind::VelocityUpdate, WorkgroupSize::default());
let d2 =
CpuComputeDispatch::new(ComputeKernelKind::PressureJacobi, WorkgroupSize::default());
let d3 =
CpuComputeDispatch::new(ComputeKernelKind::ParticleForce, WorkgroupSize::default());
cache.insert("a", d1);
cache.insert("b", d2);
cache.insert("c", d3); assert!(cache.get("a").is_none());
assert!(cache.get("b").is_some());
assert!(cache.get("c").is_some());
}
#[test]
fn pipeline_cache_replace() {
let mut cache = PipelineCache::new(4);
let d1 =
CpuComputeDispatch::new(ComputeKernelKind::VelocityUpdate, WorkgroupSize::default());
let d2 = CpuComputeDispatch::new(
ComputeKernelKind::ParticleForce,
WorkgroupSize { x: 128, y: 1, z: 1 },
);
cache.insert("key", d1);
cache.insert("key", d2);
let entry = cache.get("key").unwrap();
assert_eq!(entry.kernel, ComputeKernelKind::ParticleForce);
}
#[test]
fn pipeline_stats_default() {
let stats = PipelineStats::default();
assert_eq!(stats.total_dispatches, 0);
assert_eq!(stats.total_workgroups, 0);
assert_eq!(stats.total_invocations, 0);
assert_eq!(stats.cache_hits, 0);
assert_eq!(stats.cache_misses, 0);
}
#[test]
fn pipeline_stats_record() {
let mut stats = PipelineStats::default();
stats.record_dispatch(4, WorkgroupSize { x: 64, y: 1, z: 1 });
assert_eq!(stats.total_dispatches, 1);
assert_eq!(stats.total_workgroups, 4);
assert_eq!(stats.total_invocations, 4 * 64);
}
#[test]
fn pipeline_stats_record_3d_workgroup() {
let mut stats = PipelineStats::default();
stats.record_dispatch(2, WorkgroupSize { x: 8, y: 8, z: 4 });
assert_eq!(stats.total_dispatches, 1);
assert_eq!(stats.total_workgroups, 2);
assert_eq!(stats.total_invocations, 2 * 8 * 8 * 4);
}
#[test]
fn pipeline_stats_cache_ratio() {
let mut stats = PipelineStats::default();
assert!(stats.cache_hit_ratio().is_nan() || stats.cache_hit_ratio() == 0.0);
stats.cache_hits = 3;
stats.cache_misses = 1;
assert!((stats.cache_hit_ratio() - 0.75).abs() < 1e-6);
}
#[test]
fn multi_pass_empty() {
let mp = MultiPassPipeline::new("empty");
assert_eq!(mp.passes.len(), 0);
assert_eq!(mp.label, "empty");
}
#[test]
fn multi_pass_execute_add_scale() {
let mut mp = MultiPassPipeline::new("add_scale");
mp.add_pass(ComputePass {
label: "fill".into(),
kernel: ComputeKernelKind::Custom("fill".into()),
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0],
});
mp.add_pass(ComputePass {
label: "scale".into(),
kernel: ComputeKernelKind::Custom("scale".into()),
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0],
});
assert_eq!(mp.passes.len(), 2);
assert_eq!(mp.passes[0].label, "fill");
assert_eq!(mp.passes[1].label, "scale");
}
#[test]
fn multi_pass_dispatch_velocity_chain() {
let mut mp = MultiPassPipeline::new("vel_chain");
mp.add_pass(ComputePass {
label: "step1".into(),
kernel: ComputeKernelKind::VelocityUpdate,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0, 1, 2, 3],
});
mp.add_pass(ComputePass {
label: "step2".into(),
kernel: ComputeKernelKind::VelocityUpdate,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0, 1, 2, 3],
});
let n = 2;
let mut pos = ComputeBuffer::new(n, BufferUsage::Storage, "pos");
let mut vel = ComputeBuffer::new(n, BufferUsage::Storage, "vel");
let force = ComputeBuffer::new(n, BufferUsage::Storage, "force");
let mut mass = ComputeBuffer::new(n, BufferUsage::Storage, "mass");
pos.write_f32(0, &[0.0, 0.0]);
vel.write_f32(0, &[1.0, 2.0]);
mass.write_f32(0, &[1.0, 1.0]);
let dt = 0.1_f32;
for pass in &mp.passes {
if pass.kernel == ComputeKernelKind::VelocityUpdate {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::VelocityUpdate, pass.workgroup_size);
disp.dispatch_velocity_update(&mut pos, &mut vel, &force, &mass, dt, n);
}
}
assert!((pos.data[0] - 0.2).abs() < 1e-5);
assert!((pos.data[1] - 0.4).abs() < 1e-5);
}
#[test]
fn validate_binding_valid() {
let buffers = vec![
ComputeBuffer::new(16, BufferUsage::Storage, "buf0"),
ComputeBuffer::new(16, BufferUsage::Uniform, "buf1"),
];
let pass = ComputePass {
label: "test".into(),
kernel: ComputeKernelKind::VelocityUpdate,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0, 1],
};
let errors = validate_resource_bindings(&pass, &buffers);
assert!(errors.is_empty());
}
#[test]
fn validate_binding_out_of_range() {
let buffers = vec![ComputeBuffer::new(16, BufferUsage::Storage, "buf0")];
let pass = ComputePass {
label: "test".into(),
kernel: ComputeKernelKind::VelocityUpdate,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0, 5],
};
let errors = validate_resource_bindings(&pass, &buffers);
assert_eq!(errors.len(), 1);
assert!(errors[0].contains("out of range"));
}
#[test]
fn validate_binding_duplicate() {
let buffers = vec![ComputeBuffer::new(16, BufferUsage::Storage, "buf0")];
let pass = ComputePass {
label: "test".into(),
kernel: ComputeKernelKind::VelocityUpdate,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0, 0],
};
let errors = validate_resource_bindings(&pass, &buffers);
assert_eq!(errors.len(), 1);
assert!(errors[0].contains("Duplicate"));
}
#[test]
fn validate_pipeline_all_passes() {
let buffers = vec![ComputeBuffer::new(16, BufferUsage::Storage, "buf0")];
let mut mp = MultiPassPipeline::new("test");
mp.add_pass(ComputePass {
label: "good".into(),
kernel: ComputeKernelKind::VelocityUpdate,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0],
});
mp.add_pass(ComputePass {
label: "bad".into(),
kernel: ComputeKernelKind::PressureJacobi,
workgroup_size: WorkgroupSize::default(),
buffer_bindings: vec![0, 3],
});
let errors = validate_pipeline(&mp, &buffers);
assert_eq!(errors.len(), 1); }
#[test]
fn compute_buffer_clone() {
let mut buf = ComputeBuffer::new(4, BufferUsage::Storage, "orig");
buf.write_f32(0, &[1.0, 2.0, 3.0, 4.0]);
let cloned = buf.clone();
assert_eq!(buf.data, cloned.data);
assert_eq!(buf.label, cloned.label);
}
#[test]
fn compute_buffer_staging_usage() {
let buf = ComputeBuffer::new(8, BufferUsage::Staging, "staging");
assert_eq!(buf.usage, BufferUsage::Staging);
assert_eq!(buf.byte_size(), 32);
}
#[test]
fn workgroup_dispatch_count_large() {
assert_eq!(WorkgroupSize::dispatch_count(1024, 256), 4);
assert_eq!(WorkgroupSize::dispatch_count(1025, 256), 5);
}
#[test]
fn gpu_stats_clone() {
let mut s = GpuStats::new();
s.record_dispatch(100, 1.5);
let s2 = s.clone();
assert_eq!(s.dispatch_count, s2.dispatch_count);
assert_eq!(s.bytes_transferred, s2.bytes_transferred);
assert!((s.kernel_time_ms - s2.kernel_time_ms).abs() < 1e-12);
}
#[test]
fn particle_force_repulsive_at_close_range() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::ParticleForce, WorkgroupSize::default());
let n = 2;
let mut pos = ComputeBuffer::new(2 * n, BufferUsage::Storage, "pos");
let mut force = ComputeBuffer::new(2 * n, BufferUsage::Storage, "force");
pos.write_f32(0, &[0.0, 0.0, 0.9, 0.0]);
disp.dispatch_particle_force(&pos, &mut force, 1.0, 1.0, n);
assert!(
force.data[0] < 0.0,
"expected repulsive force, got {}",
force.data[0]
);
}
#[test]
fn particle_force_three_particles() {
let disp =
CpuComputeDispatch::new(ComputeKernelKind::ParticleForce, WorkgroupSize::default());
let n = 3;
let mut pos = ComputeBuffer::new(2 * n, BufferUsage::Storage, "pos");
let mut force = ComputeBuffer::new(2 * n, BufferUsage::Storage, "force");
pos.write_f32(0, &[0.0, 0.0, 2.0, 0.0, 1.0, 1.732]);
disp.dispatch_particle_force(&pos, &mut force, 1.0, 1.0, n);
let fx_total = force.data[0] + force.data[2] + force.data[4];
let fy_total = force.data[1] + force.data[3] + force.data[5];
assert!(fx_total.abs() < 1e-5, "fx_total={fx_total}");
assert!(fy_total.abs() < 1e-5, "fy_total={fy_total}");
}
#[test]
fn pressure_poisson_uniform_rhs() {
let nx = 8;
let ny = 8;
let mut p = vec![0.0_f32; nx * ny];
let rhs = vec![1.0_f32; nx * ny];
let residual = pressure_poisson_solve(&mut p, &rhs, nx, ny, 0.1, 500);
assert!(residual < 10.0, "residual={residual}");
}
#[test]
fn sor_step_uniform_field() {
let nx = 4;
let ny = 4;
let mut p = vec![0.0_f32; nx * ny];
let rhs = vec![0.0_f32; nx * ny];
let p_ref = vec![1.0_f32; nx * ny];
sor_step_2d(&mut p, &p_ref, &rhs, nx, ny, 1.0, 1.0);
for j in 1..ny - 1 {
for i in 1..nx - 1 {
assert!((p[j * nx + i] - 1.0).abs() < 1e-6);
}
}
}
#[test]
fn sor_step_over_relaxation() {
let nx = 6;
let ny = 6;
let rhs = vec![0.0_f32; nx * ny];
let mut p_ref = vec![0.0_f32; nx * ny];
for i in 0..nx {
p_ref[i] = 1.0;
p_ref[(ny - 1) * nx + i] = 1.0;
}
for j in 0..ny {
p_ref[j * nx] = 1.0;
p_ref[j * nx + nx - 1] = 1.0;
}
let mut p_jac = vec![0.0_f32; nx * ny];
let mut p_sor = vec![0.0_f32; nx * ny];
sor_step_2d(&mut p_jac, &p_ref, &rhs, nx, ny, 1.0, 1.0);
sor_step_2d(&mut p_sor, &p_ref, &rhs, nx, ny, 1.0, 1.5);
let idx = nx + 1; assert!(
(p_sor[idx] - p_jac[idx]).abs() > 0.01,
"SOR and Jacobi should differ: SOR={}, Jac={}",
p_sor[idx],
p_jac[idx]
);
}
#[test]
fn red_black_gs_uniform() {
let nx = 6;
let ny = 6;
let mut p = vec![1.0_f32; nx * ny];
let rhs = vec![0.0_f32; nx * ny];
red_black_gauss_seidel_step(&mut p, &rhs, nx, ny, 1.0);
for j in 1..ny - 1 {
for i in 1..nx - 1 {
assert!((p[j * nx + i] - 1.0).abs() < 1e-6);
}
}
}
#[test]
fn red_black_gs_converges() {
let nx = 8;
let ny = 8;
let mut p = vec![0.0_f32; nx * ny];
let rhs = vec![0.0_f32; nx * ny];
for i in 0..nx {
p[i] = 1.0;
p[(ny - 1) * nx + i] = 1.0;
}
for j in 0..ny {
p[j * nx] = 1.0;
p[j * nx + nx - 1] = 1.0;
}
for _ in 0..200 {
red_black_gauss_seidel_step(&mut p, &rhs, nx, ny, 1.0);
}
let center = p[(ny / 2) * nx + nx / 2];
assert!((center - 1.0).abs() < 0.01, "center={center}");
}
#[test]
fn linf_residual_zero_for_exact() {
let nx = 4;
let ny = 4;
let p = vec![1.0_f32; nx * ny];
let rhs = vec![0.0_f32; nx * ny];
let res = compute_linf_residual(&p, &rhs, nx, ny, 1.0);
assert!(res < 1e-6, "res={res}");
}
#[test]
fn linf_residual_nonzero_for_wrong() {
let nx = 4;
let ny = 4;
let mut p = vec![0.0_f32; nx * ny];
p[nx + 1] = 100.0; let rhs = vec![0.0_f32; nx * ny];
let res = compute_linf_residual(&p, &rhs, nx, ny, 1.0);
assert!(res > 1.0, "expected large residual, got {res}");
}
#[test]
fn dispatch_neighbor_search_basic() {
let n = 4;
let positions = vec![
0.0_f32, 0.0, 0.5, 0.0, 5.0, 5.0, 0.3, 0.3, ];
let neighbors = dispatch_neighbor_search(&positions, n, 1.0);
assert!(neighbors[0].contains(&1));
assert!(neighbors[0].contains(&3));
assert!(neighbors[2].is_empty());
}
#[test]
fn dispatch_neighbor_search_all_close() {
let n = 3;
let positions = vec![0.0_f32, 0.0, 0.1, 0.0, 0.0, 0.1];
let neighbors = dispatch_neighbor_search(&positions, n, 1.0);
assert_eq!(neighbors[0].len(), 2);
assert_eq!(neighbors[1].len(), 2);
assert_eq!(neighbors[2].len(), 2);
}
#[test]
fn dispatch_neighbor_search_none() {
let n = 2;
let positions = vec![0.0_f32, 0.0, 100.0, 100.0];
let neighbors = dispatch_neighbor_search(&positions, n, 1.0);
assert!(neighbors[0].is_empty());
assert!(neighbors[1].is_empty());
}
}