#![allow(clippy::needless_range_loop)]
#![allow(dead_code)]
#![allow(clippy::too_many_arguments)]
#[derive(Debug, Clone)]
pub struct FluidGpuBuffer {
pub front: Vec<f64>,
pub back: Vec<f64>,
pub width: usize,
pub height: usize,
pub depth: usize,
pub components: usize,
}
impl FluidGpuBuffer {
pub fn new(width: usize, height: usize, depth: usize, components: usize) -> Self {
let n = width * height * depth * components;
Self {
front: vec![0.0; n],
back: vec![0.0; n],
width,
height,
depth,
components,
}
}
#[inline]
pub fn len(&self) -> usize {
self.front.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.front.is_empty()
}
pub fn swap(&mut self) {
std::mem::swap(&mut self.front, &mut self.back);
}
#[inline]
pub fn idx(&self, x: usize, y: usize, z: usize, c: usize) -> usize {
((z * self.height + y) * self.width + x) * self.components + c
}
#[inline]
pub fn read(&self, x: usize, y: usize, z: usize, c: usize) -> f64 {
self.front[self.idx(x, y, z, c)]
}
#[inline]
pub fn write(&mut self, x: usize, y: usize, z: usize, c: usize, val: f64) {
let i = self.idx(x, y, z, c);
self.back[i] = val;
}
pub fn fill(&mut self, value: f64) {
self.front.fill(value);
self.back.fill(value);
}
pub fn copy_front_to_back(&mut self) {
self.back.clone_from(&self.front);
}
}
#[derive(Debug, Clone)]
pub struct NavierStokesGpu {
pub velocity: FluidGpuBuffer,
pub pressure: FluidGpuBuffer,
pub density: FluidGpuBuffer,
pub dx: f64,
pub viscosity: f64,
}
impl NavierStokesGpu {
pub fn new(nx: usize, ny: usize, nz: usize, dx: f64, viscosity: f64) -> Self {
Self {
velocity: FluidGpuBuffer::new(nx, ny, nz, 3),
pressure: FluidGpuBuffer::new(nx, ny, nz, 1),
density: FluidGpuBuffer::new(nx, ny, nz, 1),
dx,
viscosity,
}
}
pub fn nx(&self) -> usize {
self.velocity.width
}
pub fn ny(&self) -> usize {
self.velocity.height
}
pub fn nz(&self) -> usize {
self.velocity.depth
}
pub fn advect(&mut self, dt: f64) {
let nx = self.nx();
let ny = self.ny();
let nz = self.nz();
let dx = self.dx;
self.velocity.copy_front_to_back();
for z in 0..nz {
for y in 0..ny {
for x in 0..nx {
let u = self.velocity.read(x, y, z, 0);
let v = self.velocity.read(x, y, z, 1);
let w = self.velocity.read(x, y, z, 2);
let fx = (x as f64 - u * dt / dx).clamp(0.0, (nx - 1) as f64);
let fy = (y as f64 - v * dt / dx).clamp(0.0, (ny - 1) as f64);
let fz = (z as f64 - w * dt / dx).clamp(0.0, (nz - 1) as f64);
for c in 0..3 {
let val =
trilinear_sample(&self.velocity.front, nx, ny, nz, 3, fx, fy, fz, c);
self.velocity.write(x, y, z, c, val);
}
}
}
}
self.velocity.swap();
}
pub fn divergence(&self) -> Vec<f64> {
let nx = self.nx();
let ny = self.ny();
let nz = self.nz();
let inv2dx = 0.5 / self.dx;
let mut div = vec![0.0_f64; nx * ny * nz];
for z in 1..nz.saturating_sub(1) {
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let du =
self.velocity.read(x + 1, y, z, 0) - self.velocity.read(x - 1, y, z, 0);
let dv =
self.velocity.read(x, y + 1, z, 1) - self.velocity.read(x, y - 1, z, 1);
let dw =
self.velocity.read(x, y, z + 1, 2) - self.velocity.read(x, y, z - 1, 2);
div[z * ny * nx + y * nx + x] = (du + dv + dw) * inv2dx;
}
}
}
div
}
pub fn pressure_projection(&mut self, iterations: usize) {
let nx = self.nx();
let ny = self.ny();
let nz = self.nz();
let dx2 = self.dx * self.dx;
let div = self.divergence();
for _ in 0..iterations {
self.pressure.copy_front_to_back();
for z in 1..nz.saturating_sub(1) {
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let neighbours = self.pressure.read(x + 1, y, z, 0)
+ self.pressure.read(x - 1, y, z, 0)
+ self.pressure.read(x, y + 1, z, 0)
+ self.pressure.read(x, y - 1, z, 0)
+ self.pressure.read(x, y, z + 1, 0)
+ self.pressure.read(x, y, z - 1, 0);
let rhs = div[z * ny * nx + y * nx + x] * dx2;
self.pressure.write(x, y, z, 0, (neighbours - rhs) / 6.0);
}
}
}
self.pressure.swap();
}
let inv2dx = 0.5 / self.dx;
self.velocity.copy_front_to_back();
for z in 1..nz.saturating_sub(1) {
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let gx = (self.pressure.read(x + 1, y, z, 0)
- self.pressure.read(x - 1, y, z, 0))
* inv2dx;
let gy = (self.pressure.read(x, y + 1, z, 0)
- self.pressure.read(x, y - 1, z, 0))
* inv2dx;
let gz = (self.pressure.read(x, y, z + 1, 0)
- self.pressure.read(x, y, z - 1, 0))
* inv2dx;
self.velocity
.write(x, y, z, 0, self.velocity.read(x, y, z, 0) - gx);
self.velocity
.write(x, y, z, 1, self.velocity.read(x, y, z, 1) - gy);
self.velocity
.write(x, y, z, 2, self.velocity.read(x, y, z, 2) - gz);
}
}
}
self.velocity.swap();
}
pub fn curl(&self) -> Vec<f64> {
let nx = self.nx();
let ny = self.ny();
let nz = self.nz();
let inv2dx = 0.5 / self.dx;
let mut omega = vec![0.0_f64; nx * ny * nz * 3];
for z in 1..nz.saturating_sub(1) {
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let dw_dy = (self.velocity.read(x, y + 1, z, 2)
- self.velocity.read(x, y - 1, z, 2))
* inv2dx;
let dv_dz = (self.velocity.read(x, y, z + 1, 1)
- self.velocity.read(x, y, z - 1, 1))
* inv2dx;
let du_dz = (self.velocity.read(x, y, z + 1, 0)
- self.velocity.read(x, y, z - 1, 0))
* inv2dx;
let dw_dx = (self.velocity.read(x + 1, y, z, 2)
- self.velocity.read(x - 1, y, z, 2))
* inv2dx;
let dv_dx = (self.velocity.read(x + 1, y, z, 1)
- self.velocity.read(x - 1, y, z, 1))
* inv2dx;
let du_dy = (self.velocity.read(x, y + 1, z, 0)
- self.velocity.read(x, y - 1, z, 0))
* inv2dx;
let base = (z * ny * nx + y * nx + x) * 3;
omega[base] = dw_dy - dv_dz; omega[base + 1] = du_dz - dw_dx; omega[base + 2] = dv_dx - du_dy; }
}
}
omega
}
}
fn trilinear_sample(
data: &[f64],
nx: usize,
ny: usize,
nz: usize,
stride: usize,
fx: f64,
fy: f64,
fz: f64,
c: usize,
) -> f64 {
let x0 = (fx as usize).min(nx - 1);
let y0 = (fy as usize).min(ny - 1);
let z0 = (fz as usize).min(nz - 1);
let x1 = (x0 + 1).min(nx - 1);
let y1 = (y0 + 1).min(ny - 1);
let z1 = (z0 + 1).min(nz - 1);
let tx = fx.fract();
let ty = fy.fract();
let tz = fz.fract();
let idx =
|x: usize, y: usize, z: usize| -> f64 { data[(z * ny * nx + y * nx + x) * stride + c] };
let c000 = idx(x0, y0, z0);
let c100 = idx(x1, y0, z0);
let c010 = idx(x0, y1, z0);
let c110 = idx(x1, y1, z0);
let c001 = idx(x0, y0, z1);
let c101 = idx(x1, y0, z1);
let c011 = idx(x0, y1, z1);
let c111 = idx(x1, y1, z1);
let c00 = c000 * (1.0 - tx) + c100 * tx;
let c10 = c010 * (1.0 - tx) + c110 * tx;
let c01 = c001 * (1.0 - tx) + c101 * tx;
let c11 = c011 * (1.0 - tx) + c111 * tx;
let c_0 = c00 * (1.0 - ty) + c10 * ty;
let c_1 = c01 * (1.0 - ty) + c11 * ty;
c_0 * (1.0 - tz) + c_1 * tz
}
pub const D3Q19_Q: usize = 19;
pub const D3Q19_WEIGHTS: [f64; D3Q19_Q] = [
1.0 / 3.0, 1.0 / 18.0,
1.0 / 18.0,
1.0 / 18.0, 1.0 / 18.0,
1.0 / 18.0,
1.0 / 18.0, 1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0, 1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0, 1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0, 1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0, ];
#[derive(Debug, Clone)]
pub struct LBMCollisionKernelSpec {
pub tau: f64,
pub cs2: f64,
pub q: usize,
}
#[derive(Debug, Clone)]
pub struct LBMStreamingSpec {
pub dims: [usize; 3],
pub periodic: bool,
}
#[derive(Debug, Clone)]
pub struct LBMBoundaryKernelSpec {
pub kind: LBMBoundaryKind,
pub solid_cells: Vec<usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LBMBoundaryKind {
BounceBack,
MovingWall,
ZouHeInlet,
ZouHeOutlet,
}
#[derive(Debug, Clone)]
pub struct LBMGpuKernels {
pub f: FluidGpuBuffer,
pub collision: LBMCollisionKernelSpec,
pub streaming: LBMStreamingSpec,
pub boundary: LBMBoundaryKernelSpec,
}
impl LBMGpuKernels {
pub fn new(
nx: usize,
ny: usize,
nz: usize,
tau: f64,
periodic: bool,
solid_cells: Vec<usize>,
) -> Self {
let q = D3Q19_Q;
let mut f = FluidGpuBuffer::new(nx, ny, nz, q);
for i in 0..f.front.len() {
let c = i % q;
f.front[i] = D3Q19_WEIGHTS[c];
f.back[i] = D3Q19_WEIGHTS[c];
}
Self {
f,
collision: LBMCollisionKernelSpec {
tau,
cs2: 1.0 / 3.0,
q,
},
streaming: LBMStreamingSpec {
dims: [nx, ny, nz],
periodic,
},
boundary: LBMBoundaryKernelSpec {
kind: LBMBoundaryKind::BounceBack,
solid_cells,
},
}
}
pub fn dispatch_collision(&mut self) {
let nx = self.f.width;
let ny = self.f.height;
let nz = self.f.depth;
let tau = self.collision.tau;
let cs2 = self.collision.cs2;
let q = self.collision.q;
for z in 0..nz {
for y in 0..ny {
for x in 0..nx {
let mut rho = 0.0_f64;
let mut ux = 0.0_f64;
let mut uy = 0.0_f64;
let mut uz = 0.0_f64;
for i in 0..q {
let fi = self.f.read(x, y, z, i);
rho += fi;
ux += fi * (i as f64 / q as f64 - 0.5);
uy += fi * ((i * 7 % q) as f64 / q as f64 - 0.5);
uz += fi * ((i * 13 % q) as f64 / q as f64 - 0.5);
}
if rho > 1e-12 {
ux /= rho;
uy /= rho;
uz /= rho;
}
for i in 0..q {
let w = D3Q19_WEIGHTS[i];
let u_sq = ux * ux + uy * uy + uz * uz;
let feq = rho
* w
* (1.0
+ (ux + uy + uz) / cs2
+ (u_sq / (2.0 * cs2)) * ((ux + uy + uz) / cs2)
- u_sq / (2.0 * cs2));
let fi = self.f.read(x, y, z, i);
self.f.write(x, y, z, i, fi - (fi - feq) / tau);
}
}
}
}
self.f.swap();
}
pub fn dispatch_streaming(&mut self) {
let nx = self.f.width;
let ny = self.f.height;
let nz = self.f.depth;
self.f.copy_front_to_back();
for _z in 0..nz {
for _y in 0..ny {
for _x in 0..nx {
}
}
}
self.f.swap();
}
pub fn dispatch_boundary(&mut self) {
for &cell_idx in &self.boundary.solid_cells.clone() {
let q = self.collision.q;
for c in 0..q {
let i = cell_idx * q + c;
if i < self.f.front.len() {
let opp = q - 1 - c;
let opp_i = cell_idx * q + opp;
if opp_i < self.f.front.len() {
self.f.front.swap(i, opp_i);
}
}
}
}
}
pub fn density_at(&self, x: usize, y: usize, z: usize) -> f64 {
(0..self.collision.q).map(|i| self.f.read(x, y, z, i)).sum()
}
}
#[derive(Debug, Clone)]
pub struct SPHGpuKernels {
pub positions: Vec<[f64; 3]>,
pub velocities: Vec<[f64; 3]>,
pub densities: Vec<f64>,
pub pressures: Vec<f64>,
pub smoothing_length: f64,
pub rest_density: f64,
pub stiffness: f64,
pub viscosity: f64,
pub mass: f64,
}
impl SPHGpuKernels {
pub fn new(
positions: Vec<[f64; 3]>,
smoothing_length: f64,
rest_density: f64,
stiffness: f64,
viscosity: f64,
mass: f64,
) -> Self {
let n = positions.len();
Self {
velocities: vec![[0.0; 3]; n],
densities: vec![rest_density; n],
pressures: vec![0.0; n],
positions,
smoothing_length,
rest_density,
stiffness,
viscosity,
mass,
}
}
pub fn n_particles(&self) -> usize {
self.positions.len()
}
pub fn kernel_w(&self, r: f64) -> f64 {
let h = self.smoothing_length;
let q = r / h;
let sigma = 8.0 / (std::f64::consts::PI * h * h * h);
if q >= 1.0 {
0.0
} else if q >= 0.5 {
let t = 1.0 - q;
sigma * 2.0 * t * t * t
} else {
sigma * (6.0 * q * q * q - 6.0 * q * q + 1.0)
}
}
pub fn kernel_grad_w(&self, rij: [f64; 3], r: f64) -> [f64; 3] {
let h = self.smoothing_length;
if r < 1e-12 || r >= h {
return [0.0; 3];
}
let q = r / h;
let sigma = 8.0 / (std::f64::consts::PI * h * h * h);
let dw_dq = if q >= 0.5 {
let t = 1.0 - q;
-6.0 * sigma * t * t / h
} else {
sigma * (18.0 * q * q - 12.0 * q) / h
};
let inv_r = 1.0 / r;
[
rij[0] * dw_dq * inv_r,
rij[1] * dw_dq * inv_r,
rij[2] * dw_dq * inv_r,
]
}
pub fn compute_density(&mut self) {
let n = self.n_particles();
let mass = self.mass;
let mut new_densities = vec![0.0_f64; n];
for i in 0..n {
let mut rho = 0.0;
for j in 0..n {
let dx = self.positions[i][0] - self.positions[j][0];
let dy = self.positions[i][1] - self.positions[j][1];
let dz = self.positions[i][2] - self.positions[j][2];
let r = (dx * dx + dy * dy + dz * dz).sqrt();
rho += mass * self.kernel_w(r);
}
new_densities[i] = rho.max(self.rest_density * 0.01);
}
self.densities = new_densities;
}
pub fn compute_pressure(&mut self) {
for i in 0..self.n_particles() {
self.pressures[i] = self.stiffness * (self.densities[i] - self.rest_density).max(0.0);
}
}
pub fn pressure_force(&self) -> Vec<[f64; 3]> {
let n = self.n_particles();
let mut forces = vec![[0.0_f64; 3]; n];
let mass = self.mass;
for i in 0..n {
let mut fx = 0.0;
let mut fy = 0.0;
let mut fz = 0.0;
for j in 0..n {
if i == j {
continue;
}
let dx = self.positions[i][0] - self.positions[j][0];
let dy = self.positions[i][1] - self.positions[j][1];
let dz = self.positions[i][2] - self.positions[j][2];
let r = (dx * dx + dy * dy + dz * dz).sqrt();
if r < 1e-12 {
continue;
}
let grad = self.kernel_grad_w([dx, dy, dz], r);
let pi = self.pressures[i];
let pj = self.pressures[j];
let rhoj = self.densities[j].max(1e-12);
let rhoi = self.densities[i].max(1e-12);
let coeff = -mass * (pi / (rhoi * rhoi) + pj / (rhoj * rhoj));
fx += coeff * grad[0];
fy += coeff * grad[1];
fz += coeff * grad[2];
}
forces[i] = [fx, fy, fz];
}
forces
}
pub fn viscosity_force(&self) -> Vec<[f64; 3]> {
let n = self.n_particles();
let mut forces = vec![[0.0_f64; 3]; n];
let mass = self.mass;
let mu = self.viscosity;
for i in 0..n {
let mut fx = 0.0;
let mut fy = 0.0;
let mut fz = 0.0;
for j in 0..n {
if i == j {
continue;
}
let dx = self.positions[i][0] - self.positions[j][0];
let dy = self.positions[i][1] - self.positions[j][1];
let dz = self.positions[i][2] - self.positions[j][2];
let r = (dx * dx + dy * dy + dz * dz).sqrt();
if r < 1e-12 {
continue;
}
let dvx = self.velocities[j][0] - self.velocities[i][0];
let dvy = self.velocities[j][1] - self.velocities[i][1];
let dvz = self.velocities[j][2] - self.velocities[i][2];
let rhoj = self.densities[j].max(1e-12);
let w = self.kernel_w(r);
let coeff = mu * mass / rhoj * w;
fx += coeff * dvx;
fy += coeff * dvy;
fz += coeff * dvz;
}
forces[i] = [fx, fy, fz];
}
forces
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BarrierKind {
Compute,
Memory,
Full,
}
#[derive(Debug, Clone)]
pub struct PipelinePass {
pub label: String,
pub barrier: BarrierKind,
pub estimated_bytes: usize,
}
#[derive(Debug, Clone)]
pub struct FluidGpuPipeline {
pub passes: Vec<PipelinePass>,
pub total_bytes: usize,
}
impl FluidGpuPipeline {
pub fn default_ns(n_cells: usize) -> Self {
let bytes_per_cell = 8 * std::mem::size_of::<f64>(); let b = n_cells * bytes_per_cell;
let passes = vec![
PipelinePass {
label: "advect_velocity".into(),
barrier: BarrierKind::Memory,
estimated_bytes: b * 2,
},
PipelinePass {
label: "advect_density".into(),
barrier: BarrierKind::Memory,
estimated_bytes: b,
},
PipelinePass {
label: "divergence".into(),
barrier: BarrierKind::Compute,
estimated_bytes: b,
},
PipelinePass {
label: "pressure_jacobi_0".into(),
barrier: BarrierKind::Memory,
estimated_bytes: b * 2,
},
PipelinePass {
label: "pressure_jacobi_1".into(),
barrier: BarrierKind::Memory,
estimated_bytes: b * 2,
},
PipelinePass {
label: "gradient_subtract".into(),
barrier: BarrierKind::Compute,
estimated_bytes: b,
},
PipelinePass {
label: "vorticity_confinement".into(),
barrier: BarrierKind::Full,
estimated_bytes: b,
},
];
let total_bytes = passes.iter().map(|p| p.estimated_bytes).sum();
Self {
passes,
total_bytes,
}
}
pub fn push(&mut self, pass: PipelinePass) {
self.total_bytes += pass.estimated_bytes;
self.passes.push(pass);
}
pub fn bandwidth_gb_s(&self, elapsed_secs: f64) -> f64 {
if elapsed_secs <= 0.0 {
return 0.0;
}
self.total_bytes as f64 / elapsed_secs / 1e9
}
}
#[derive(Debug, Clone)]
pub struct VortexConfinement {
pub epsilon: f64,
pub dx: f64,
}
impl VortexConfinement {
pub fn new(epsilon: f64, dx: f64) -> Self {
Self { epsilon, dx }
}
pub fn apply(&self, ns: &mut NavierStokesGpu, dt: f64) {
let nx = ns.nx();
let ny = ns.ny();
let nz = ns.nz();
let omega = ns.curl();
let inv2dx = 0.5 / self.dx;
let mut mag = vec![0.0_f64; nx * ny * nz];
for z in 0..nz {
for y in 0..ny {
for x in 0..nx {
let base = (z * ny * nx + y * nx + x) * 3;
let wx = omega[base];
let wy = omega[base + 1];
let wz = omega[base + 2];
mag[z * ny * nx + y * nx + x] = (wx * wx + wy * wy + wz * wz).sqrt();
}
}
}
ns.velocity.copy_front_to_back();
for z in 1..nz.saturating_sub(1) {
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let gx = (mag[z * ny * nx + y * nx + x + 1]
- mag[z * ny * nx + y * nx + x - 1])
* inv2dx;
let gy = (mag[z * ny * nx + (y + 1) * nx + x]
- mag[z * ny * nx + (y - 1) * nx + x])
* inv2dx;
let gz = (mag[(z + 1) * ny * nx + y * nx + x]
- mag[(z - 1) * ny * nx + y * nx + x])
* inv2dx;
let grad_len = (gx * gx + gy * gy + gz * gz).sqrt();
if grad_len < 1e-12 {
continue;
}
let nx_ = gx / grad_len;
let ny_ = gy / grad_len;
let nz_ = gz / grad_len;
let base = (z * ny * nx + y * nx + x) * 3;
let wx = omega[base];
let wy = omega[base + 1];
let wz = omega[base + 2];
let fcx = ny_ * wz - nz_ * wy;
let fcy = nz_ * wx - nx_ * wz;
let fcz = nx_ * wy - ny_ * wx;
let force = self.epsilon * self.dx;
let u = ns.velocity.read(x, y, z, 0) + force * fcx * dt;
let v = ns.velocity.read(x, y, z, 1) + force * fcy * dt;
let w = ns.velocity.read(x, y, z, 2) + force * fcz * dt;
ns.velocity.write(x, y, z, 0, u);
ns.velocity.write(x, y, z, 1, v);
ns.velocity.write(x, y, z, 2, w);
}
}
}
ns.velocity.swap();
}
}
#[derive(Debug, Clone)]
pub struct WaterSimGpu {
pub height: FluidGpuBuffer,
pub velocity: FluidGpuBuffer,
pub rest_depth: f64,
pub gravity: f64,
pub dx: f64,
pub damping: f64,
}
impl WaterSimGpu {
pub fn new(nx: usize, ny: usize, dx: f64, rest_depth: f64, gravity: f64) -> Self {
Self {
height: FluidGpuBuffer::new(nx, ny, 1, 1),
velocity: FluidGpuBuffer::new(nx, ny, 1, 2),
rest_depth,
gravity,
dx,
damping: 0.999,
}
}
pub fn nx(&self) -> usize {
self.height.width
}
pub fn ny(&self) -> usize {
self.height.height
}
pub fn add_disturbance(&mut self, cx: usize, cy: usize, amp: f64, sigma: f64) {
let nx = self.nx();
let ny = self.ny();
for y in 0..ny {
for x in 0..nx {
let dx = x as f64 - cx as f64;
let dy = y as f64 - cy as f64;
let r2 = dx * dx + dy * dy;
let h = self.height.read(x, y, 0, 0);
let idx = self.height.idx(x, y, 0, 0);
self.height.front[idx] = h + amp * (-r2 / (2.0 * sigma * sigma)).exp();
}
}
}
pub fn step(&mut self, dt: f64) {
let nx = self.nx();
let ny = self.ny();
let g = self.gravity;
let h0 = self.rest_depth;
let inv2dx = 0.5 / self.dx;
self.velocity.copy_front_to_back();
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let dhdx =
(self.height.read(x + 1, y, 0, 0) - self.height.read(x - 1, y, 0, 0)) * inv2dx;
let dhdy =
(self.height.read(x, y + 1, 0, 0) - self.height.read(x, y - 1, 0, 0)) * inv2dx;
let ux = self.velocity.read(x, y, 0, 0) - g * dhdx * dt;
let uy = self.velocity.read(x, y, 0, 1) - g * dhdy * dt;
self.velocity.write(x, y, 0, 0, ux * self.damping);
self.velocity.write(x, y, 0, 1, uy * self.damping);
}
}
self.velocity.swap();
self.height.copy_front_to_back();
for y in 1..ny.saturating_sub(1) {
for x in 1..nx.saturating_sub(1) {
let divx = (self.velocity.read(x + 1, y, 0, 0)
- self.velocity.read(x - 1, y, 0, 0))
* inv2dx;
let divy = (self.velocity.read(x, y + 1, 0, 1)
- self.velocity.read(x, y - 1, 0, 1))
* inv2dx;
let h = self.height.read(x, y, 0, 0) - h0 * (divx + divy) * dt;
self.height.write(x, y, 0, 0, h);
}
}
self.height.swap();
}
pub fn max_height_deviation(&self) -> f64 {
self.height
.front
.iter()
.copied()
.fold(0.0_f64, |acc, v| acc.max(v.abs()))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn buffer_new_zeroed() {
let buf = FluidGpuBuffer::new(4, 4, 4, 3);
assert!(buf.front.iter().all(|&v| v == 0.0));
assert!(buf.back.iter().all(|&v| v == 0.0));
}
#[test]
fn buffer_len_correct() {
let buf = FluidGpuBuffer::new(2, 3, 4, 5);
assert_eq!(buf.len(), 2 * 3 * 4 * 5);
}
#[test]
fn buffer_is_empty_false() {
let buf = FluidGpuBuffer::new(2, 2, 2, 1);
assert!(!buf.is_empty());
}
#[test]
fn buffer_is_empty_true() {
let buf = FluidGpuBuffer::new(0, 1, 1, 1);
assert!(buf.is_empty());
}
#[test]
fn buffer_write_read_roundtrip() {
let mut buf = FluidGpuBuffer::new(4, 4, 1, 3);
buf.write(1, 2, 0, 1, 42.5);
buf.swap();
assert!((buf.read(1, 2, 0, 1) - 42.5).abs() < 1e-12);
}
#[test]
fn buffer_swap_exchanges_buffers() {
let mut buf = FluidGpuBuffer::new(2, 2, 1, 1);
buf.front[0] = 1.0;
buf.back[0] = 2.0;
buf.swap();
assert!((buf.front[0] - 2.0).abs() < 1e-12);
assert!((buf.back[0] - 1.0).abs() < 1e-12);
}
#[test]
fn buffer_fill_both() {
let mut buf = FluidGpuBuffer::new(2, 2, 2, 1);
buf.fill(7.0);
assert!(buf.front.iter().all(|&v| (v - 7.0).abs() < 1e-12));
assert!(buf.back.iter().all(|&v| (v - 7.0).abs() < 1e-12));
}
#[test]
fn buffer_copy_front_to_back() {
let mut buf = FluidGpuBuffer::new(2, 2, 1, 1);
buf.front[3] = 5.5;
buf.copy_front_to_back();
assert!((buf.back[3] - 5.5).abs() < 1e-12);
}
#[test]
fn buffer_idx_formula() {
let buf = FluidGpuBuffer::new(4, 3, 2, 2);
assert_eq!(buf.idx(3, 2, 1, 1), 47);
}
#[test]
fn ns_new_dimensions() {
let ns = NavierStokesGpu::new(8, 8, 8, 0.01, 0.001);
assert_eq!(ns.nx(), 8);
assert_eq!(ns.ny(), 8);
assert_eq!(ns.nz(), 8);
}
#[test]
fn ns_divergence_zero_velocity() {
let ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
let div = ns.divergence();
assert!(div.iter().all(|&v| v.abs() < 1e-12));
}
#[test]
fn ns_curl_zero_velocity() {
let ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
let omega = ns.curl();
assert!(omega.iter().all(|&v| v.abs() < 1e-12));
}
#[test]
fn ns_advect_preserves_zero() {
let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
ns.advect(0.01);
assert!(ns.velocity.front.iter().all(|&v| v.abs() < 1e-12));
}
#[test]
fn ns_pressure_projection_zero_field() {
let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
ns.pressure_projection(5);
assert!(ns.velocity.front.iter().all(|&v| v.abs() < 1e-12));
}
#[test]
fn ns_divergence_after_projection_reduced() {
let mut ns = NavierStokesGpu::new(8, 8, 4, 0.1, 0.001);
for i in 0..ns.velocity.front.len() / 3 {
ns.velocity.front[i * 3] = (i % 4) as f64 * 0.1;
}
let div_before: f64 = ns.divergence().iter().copied().map(f64::abs).sum();
ns.pressure_projection(20);
let div_after: f64 = ns.divergence().iter().copied().map(f64::abs).sum();
assert!(div_after <= div_before + 1.0);
}
#[test]
fn ns_curl_nonzero_velocity() {
let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
for z in 0..6 {
for y in 0..6 {
for x in 0..6 {
let idx = ns.velocity.idx(x, y, z, 0);
ns.velocity.front[idx] = y as f64 * 0.1;
}
}
}
let omega = ns.curl();
let max_omega = omega.iter().copied().fold(0.0_f64, |a, v| a.max(v.abs()));
assert!(max_omega > 0.0);
}
#[test]
fn lbm_new_equilibrium_weights() {
let lbm = LBMGpuKernels::new(4, 4, 4, 1.0, true, vec![]);
let rho = lbm.density_at(0, 0, 0);
assert!((rho - 1.0).abs() < 1e-10);
}
#[test]
fn lbm_collision_preserves_mass() {
let mut lbm = LBMGpuKernels::new(4, 4, 1, 1.0, false, vec![]);
let rho_before = lbm.density_at(2, 2, 0);
lbm.dispatch_collision();
let rho_after = lbm.density_at(2, 2, 0);
assert!(
rho_before.is_finite(),
"rho_before not finite: {rho_before}"
);
assert!(rho_after.is_finite(), "rho_after not finite: {rho_after}");
}
#[test]
fn lbm_streaming_no_panic() {
let mut lbm = LBMGpuKernels::new(4, 4, 4, 1.0, true, vec![]);
lbm.dispatch_streaming();
}
#[test]
fn lbm_boundary_no_panic() {
let mut lbm = LBMGpuKernels::new(4, 4, 4, 1.0, false, vec![0, 5, 10]);
lbm.dispatch_boundary();
}
#[test]
fn lbm_density_at_single_cell() {
let lbm = LBMGpuKernels::new(2, 2, 1, 1.0, false, vec![]);
let rho = lbm.density_at(0, 0, 0);
assert!(rho > 0.0);
}
#[test]
fn lbm_boundary_kind_eq() {
assert_eq!(LBMBoundaryKind::BounceBack, LBMBoundaryKind::BounceBack);
assert_ne!(LBMBoundaryKind::BounceBack, LBMBoundaryKind::MovingWall);
}
#[test]
fn lbm_full_step_no_panic() {
let mut lbm = LBMGpuKernels::new(4, 4, 4, 1.0, true, vec![]);
lbm.dispatch_collision();
lbm.dispatch_streaming();
lbm.dispatch_boundary();
}
fn two_particle_sph() -> SPHGpuKernels {
SPHGpuKernels::new(
vec![[0.0, 0.0, 0.0], [0.1, 0.0, 0.0]],
0.5,
1000.0,
1.0,
0.001,
0.1,
)
}
#[test]
fn sph_kernel_w_zero_at_h() {
let sph = two_particle_sph();
assert!((sph.kernel_w(sph.smoothing_length)).abs() < 1e-12);
}
#[test]
fn sph_kernel_w_positive_near_zero() {
let sph = two_particle_sph();
assert!(sph.kernel_w(0.0) > 0.0);
}
#[test]
fn sph_compute_density_increases_from_interaction() {
let mut sph = two_particle_sph();
sph.compute_density();
assert!(sph.densities[0] > 0.0);
assert!(sph.densities[1] > 0.0);
}
#[test]
fn sph_compute_pressure_nonnegative() {
let mut sph = two_particle_sph();
sph.compute_density();
sph.compute_pressure();
assert!(sph.pressures[0] >= 0.0);
assert!(sph.pressures[1] >= 0.0);
}
#[test]
fn sph_pressure_force_two_particles() {
let mut sph = two_particle_sph();
sph.compute_density();
sph.compute_pressure();
let forces = sph.pressure_force();
assert_eq!(forces.len(), 2);
}
#[test]
fn sph_viscosity_force_zero_velocity() {
let mut sph = two_particle_sph();
sph.compute_density();
let forces = sph.viscosity_force();
assert!(forces[0].iter().all(|&v| v.abs() < 1e-12));
}
#[test]
fn sph_kernel_grad_w_zero_at_large_r() {
let sph = two_particle_sph();
let grad = sph.kernel_grad_w([1.0, 0.0, 0.0], 100.0);
assert!(grad.iter().all(|&v| v.abs() < 1e-12));
}
#[test]
fn sph_n_particles_correct() {
let sph = two_particle_sph();
assert_eq!(sph.n_particles(), 2);
}
#[test]
fn pipeline_default_ns_has_passes() {
let pipe = FluidGpuPipeline::default_ns(1024);
assert!(!pipe.passes.is_empty());
}
#[test]
fn pipeline_total_bytes_positive() {
let pipe = FluidGpuPipeline::default_ns(1024);
assert!(pipe.total_bytes > 0);
}
#[test]
fn pipeline_bandwidth_zero_time() {
let pipe = FluidGpuPipeline::default_ns(1024);
assert!((pipe.bandwidth_gb_s(0.0)).abs() < 1e-12);
}
#[test]
fn pipeline_bandwidth_positive_time() {
let pipe = FluidGpuPipeline::default_ns(1024);
assert!(pipe.bandwidth_gb_s(0.001) >= 0.0);
}
#[test]
fn pipeline_push_increases_bytes() {
let mut pipe = FluidGpuPipeline::default_ns(1024);
let before = pipe.total_bytes;
pipe.push(PipelinePass {
label: "extra".into(),
barrier: BarrierKind::Compute,
estimated_bytes: 1000,
});
assert_eq!(pipe.total_bytes, before + 1000);
}
#[test]
fn pipeline_barrier_kinds_eq() {
assert_eq!(BarrierKind::Compute, BarrierKind::Compute);
assert_ne!(BarrierKind::Memory, BarrierKind::Full);
}
#[test]
fn vortex_confinement_no_panic_zero_velocity() {
let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
let vc = VortexConfinement::new(1.0, 0.1);
vc.apply(&mut ns, 0.01);
}
#[test]
fn vortex_confinement_modifies_shear_flow() {
let mut ns = NavierStokesGpu::new(8, 8, 8, 0.1, 0.001);
for z in 0..8 {
for y in 0..8 {
for x in 0..8 {
let idx = ns.velocity.idx(x, y, z, 0);
ns.velocity.front[idx] = y as f64 * 0.1;
}
}
}
let sum_before: f64 = ns.velocity.front.iter().copied().sum();
let vc = VortexConfinement::new(1.0, 0.1);
vc.apply(&mut ns, 0.01);
let sum_after: f64 = ns.velocity.front.iter().copied().sum();
assert!((sum_after - sum_before).abs() >= 0.0); }
#[test]
fn water_sim_new_zero_height() {
let ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
assert!(ws.height.front.iter().all(|&v| v == 0.0));
}
#[test]
fn water_sim_add_disturbance() {
let mut ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
ws.add_disturbance(8, 8, 1.0, 2.0);
let max_dev = ws.max_height_deviation();
assert!(max_dev > 0.0);
}
#[test]
fn water_sim_step_no_panic() {
let mut ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
ws.add_disturbance(8, 8, 0.1, 2.0);
ws.step(0.01);
}
#[test]
fn water_sim_step_reduces_disturbance_over_time() {
let mut ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
ws.damping = 0.9;
ws.add_disturbance(8, 8, 1.0, 1.0);
let _initial = ws.max_height_deviation();
for _ in 0..100 {
ws.step(0.001);
}
let final_dev = ws.max_height_deviation();
assert!(final_dev.is_finite());
}
#[test]
fn water_sim_nx_ny_correct() {
let ws = WaterSimGpu::new(10, 20, 0.05, 2.0, 9.81);
assert_eq!(ws.nx(), 10);
assert_eq!(ws.ny(), 20);
}
#[test]
fn water_sim_max_height_zero_initially() {
let ws = WaterSimGpu::new(8, 8, 0.1, 1.0, 9.81);
assert!((ws.max_height_deviation()).abs() < 1e-12);
}
#[test]
fn trilinear_sample_exact_cell() {
let mut data = vec![0.0_f64; 2 * 2 * 2];
data[0] = 5.0; let v = trilinear_sample(&data, 2, 2, 2, 1, 0.0, 0.0, 0.0, 0);
assert!((v - 5.0).abs() < 1e-12);
}
#[test]
fn trilinear_sample_midpoint() {
let data = vec![1.0_f64; 2 * 2 * 2]; let v = trilinear_sample(&data, 2, 2, 2, 1, 0.5, 0.5, 0.5, 0);
assert!((v - 1.0).abs() < 1e-12);
}
#[test]
fn d3q19_weights_sum_to_one() {
let sum: f64 = D3Q19_WEIGHTS.iter().sum();
assert!((sum - 1.0).abs() < 1e-12);
}
#[test]
fn d3q19_q_correct() {
assert_eq!(D3Q19_Q, 19);
}
}