#![allow(clippy::ptr_arg)]
use super::types::{
FlipParticle, GpuBoundaryBox, LbmCellType, LbmD2Q9, MacGrid, SphConfig, SphKernels, SphParticle,
};
pub(super) fn dot3(a: [f64; 3], b: [f64; 3]) -> f64 {
a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
}
pub(super) fn sub3(a: [f64; 3], b: [f64; 3]) -> [f64; 3] {
[a[0] - b[0], a[1] - b[1], a[2] - b[2]]
}
pub(super) fn add3(a: [f64; 3], b: [f64; 3]) -> [f64; 3] {
[a[0] + b[0], a[1] + b[1], a[2] + b[2]]
}
pub(super) fn scale3(v: [f64; 3], s: f64) -> [f64; 3] {
[v[0] * s, v[1] * s, v[2] * s]
}
pub(super) fn length3(v: [f64; 3]) -> f64 {
dot3(v, v).sqrt()
}
pub(super) fn normalize3(v: [f64; 3]) -> [f64; 3] {
let len = length3(v);
if len < 1e-15 {
return [0.0; 3];
}
scale3(v, 1.0 / len)
}
pub(super) fn cross3(a: [f64; 3], b: [f64; 3]) -> [f64; 3] {
[
a[1] * b[2] - a[2] * b[1],
a[2] * b[0] - a[0] * b[2],
a[0] * b[1] - a[1] * b[0],
]
}
pub fn sph_compute_density(particles: &mut Vec<SphParticle>, config: &SphConfig) {
let n = particles.len();
let mut densities = vec![0.0f64; n];
for i in 0..n {
let mut rho = 0.0;
for j in 0..n {
let r_vec = sub3(particles[i].position, particles[j].position);
let r = length3(r_vec);
rho += particles[j].mass * SphKernels::poly6(r, config.h);
}
densities[i] = rho.max(1e-6);
}
for (i, p) in particles.iter_mut().enumerate() {
p.density = densities[i];
}
}
pub fn sph_compute_pressure(particles: &mut Vec<SphParticle>, config: &SphConfig) {
for p in particles.iter_mut() {
let ratio = p.density / config.rest_density;
p.pressure = config.pressure_k * (ratio.powi(7) - 1.0);
}
}
pub fn sph_compute_forces(particles: &mut Vec<SphParticle>, config: &SphConfig) {
let n = particles.len();
let mut forces = vec![[0.0f64; 3]; n];
for i in 0..n {
let mut f_pressure = [0.0f64; 3];
let mut f_viscosity = [0.0f64; 3];
for j in 0..n {
if i == j {
continue;
}
let r_vec = sub3(particles[i].position, particles[j].position);
let r = length3(r_vec);
if r > config.h || r < 1e-15 {
continue;
}
let pressure_factor = particles[i].pressure
/ (particles[i].density * particles[i].density)
+ particles[j].pressure / (particles[j].density * particles[j].density);
let grad = SphKernels::spiky_grad(r_vec, r, config.h);
f_pressure = add3(
f_pressure,
scale3(grad, -particles[j].mass * pressure_factor),
);
let v_diff = sub3(particles[j].velocity, particles[i].velocity);
let lap = SphKernels::viscosity_laplacian(r, config.h);
let visc_factor = config.viscosity * particles[j].mass * lap / particles[j].density;
f_viscosity = add3(f_viscosity, scale3(v_diff, visc_factor));
}
let f_gravity = scale3(config.gravity, particles[i].density);
forces[i] = add3(add3(f_pressure, f_viscosity), f_gravity);
}
for (i, p) in particles.iter_mut().enumerate() {
p.force = forces[i];
}
}
pub fn sph_integrate(particles: &mut Vec<SphParticle>, config: &SphConfig) {
for p in particles.iter_mut() {
let accel = scale3(p.force, 1.0 / p.density.max(1e-6));
p.velocity = add3(p.velocity, scale3(accel, config.dt));
p.position = add3(p.position, scale3(p.velocity, config.dt));
}
}
pub fn sph_step(particles: &mut Vec<SphParticle>, config: &SphConfig) {
sph_compute_density(particles, config);
sph_compute_pressure(particles, config);
sph_compute_forces(particles, config);
sph_integrate(particles, config);
}
pub const D3Q27_Q: usize = 27;
pub const D2Q9_Q: usize = 9;
pub const D2Q9_CX: [i32; 9] = [0, 1, 0, -1, 0, 1, -1, -1, 1];
pub const D2Q9_CY: [i32; 9] = [0, 0, 1, 0, -1, 1, 1, -1, -1];
pub const D2Q9_W: [f64; 9] = [
4.0 / 9.0,
1.0 / 9.0,
1.0 / 9.0,
1.0 / 9.0,
1.0 / 9.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
];
pub const D2Q9_OPP: [usize; 9] = [0, 3, 4, 1, 2, 7, 8, 5, 6];
pub fn advect_scalar(phi: &[f64], grid: &MacGrid, dt: f64, gravity: [f64; 3]) -> Vec<f64> {
let nx = grid.nx;
let ny = grid.ny;
let nz = grid.nz;
let dx = grid.dx;
let mut phi_new = vec![0.0f64; nx * ny * nz];
for k in 0..nz {
for j in 0..ny {
for i in 0..nx {
let xc = (i as f64 + 0.5) * dx;
let yc = (j as f64 + 0.5) * dx;
let zc = (k as f64 + 0.5) * dx;
let u0 = grid.interp_u(xc, yc, zc);
let x_mid = xc - 0.5 * dt * (u0 + gravity[0]);
let y_mid = yc - 0.5 * dt * gravity[1];
let z_mid = zc - 0.5 * dt * gravity[2];
let u_mid = grid.interp_u(x_mid, y_mid, z_mid);
let x_back = xc - dt * (u_mid + gravity[0]);
let y_back = yc - dt * gravity[1];
let z_back = zc - dt * gravity[2];
let ix = (x_back / dx - 0.5).floor() as isize;
let iy = (y_back / dx - 0.5).floor() as isize;
let iz = (z_back / dx - 0.5).floor() as isize;
let ii = ix.clamp(0, nx as isize - 1) as usize;
let jj = iy.clamp(0, ny as isize - 1) as usize;
let kk = iz.clamp(0, nz as isize - 1) as usize;
phi_new[k * nx * ny + j * nx + i] = phi[kk * nx * ny + jj * nx + ii];
}
}
}
phi_new
}
pub fn compute_vorticity(grid: &MacGrid) -> Vec<[f64; 3]> {
let nx = grid.nx;
let ny = grid.ny;
let nz = grid.nz;
let inv2dx = 1.0 / (2.0 * grid.dx);
let mut vorticity = vec![[0.0f64; 3]; nx * ny * nz];
for k in 1..nz - 1 {
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let dwdy = (grid.get_w(i, j + 1, k) - grid.get_w(i, j - 1, k)) * inv2dx;
let dvdz = (grid.get_v(i, j, k + 1) - grid.get_v(i, j, k - 1)) * inv2dx;
let dudz = (grid.get_u(i, j, k + 1) - grid.get_u(i, j, k - 1)) * inv2dx;
let dwdx = (grid.get_w(i + 1, j, k) - grid.get_w(i - 1, j, k)) * inv2dx;
let dvdx = (grid.get_v(i + 1, j, k) - grid.get_v(i - 1, j, k)) * inv2dx;
let dudy = (grid.get_u(i, j + 1, k) - grid.get_u(i, j - 1, k)) * inv2dx;
let idx = k * nx * ny + j * nx + i;
vorticity[idx] = [dwdy - dvdz, dudz - dwdx, dvdx - dudy];
}
}
}
vorticity
}
pub fn vorticity_confinement(grid: &mut MacGrid, vorticity: &[[f64; 3]], epsilon: f64, dt: f64) {
let nx = grid.nx;
let ny = grid.ny;
let nz = grid.nz;
let inv2dx = 1.0 / (2.0 * grid.dx);
for k in 1..nz - 1 {
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let mag = |x: usize, y: usize, z: usize| {
let idx = z * nx * ny + y * nx + x;
length3(vorticity[idx])
};
let gx = (mag(i + 1, j, k) - mag(i - 1, j, k)) * inv2dx;
let gy = (mag(i, j + 1, k) - mag(i, j - 1, k)) * inv2dx;
let gz = (mag(i, j, k + 1) - mag(i, j, k - 1)) * inv2dx;
let grad_len = (gx * gx + gy * gy + gz * gz).sqrt();
if grad_len < 1e-12 {
continue;
}
let n_vec = [gx / grad_len, gy / grad_len, gz / grad_len];
let idx = k * nx * ny + j * nx + i;
let omega = vorticity[idx];
let force = cross3(n_vec, omega);
let force = scale3(force, epsilon);
if i < nx {
let u = grid.get_u(i, j, k);
grid.set_u(i, j, k, u + dt * force[0]);
}
if j < ny {
let v = grid.get_v(i, j, k);
grid.set_v(i, j, k, v + dt * force[1]);
}
if k < nz {
let w = grid.get_w(i, j, k);
grid.set_w(i, j, k, w + dt * force[2]);
}
}
}
}
}
pub fn surface_tension_csf(
phi: &[f64],
nx: usize,
ny: usize,
nz: usize,
dx: f64,
sigma: f64,
) -> Vec<[f64; 3]> {
let inv_dx = 1.0 / dx;
let inv2dx = 0.5 * inv_dx;
let n = nx * ny * nz;
let mut forces = vec![[0.0f64; 3]; n];
let idx = |i: usize, j: usize, k: usize| k * nx * ny + j * nx + i;
for k in 1..nz - 1 {
for j in 1..ny - 1 {
for i in 1..nx - 1 {
let c = idx(i, j, k);
let gx = (phi[idx(i + 1, j, k)] - phi[idx(i - 1, j, k)]) * inv2dx;
let gy = (phi[idx(i, j + 1, k)] - phi[idx(i, j - 1, k)]) * inv2dx;
let gz = (phi[idx(i, j, k + 1)] - phi[idx(i, j, k - 1)]) * inv2dx;
let grad_mag = (gx * gx + gy * gy + gz * gz).sqrt();
if grad_mag < 1e-12 {
continue;
}
let nx_n = gx / grad_mag;
let ny_n = gy / grad_mag;
let nz_n = gz / grad_mag;
let phi_xx = (phi[idx(i + 1, j, k)] - 2.0 * phi[c] + phi[idx(i - 1, j, k)])
* inv_dx
* inv_dx;
let phi_yy = (phi[idx(i, j + 1, k)] - 2.0 * phi[c] + phi[idx(i, j - 1, k)])
* inv_dx
* inv_dx;
let phi_zz = (phi[idx(i, j, k + 1)] - 2.0 * phi[c] + phi[idx(i, j, k - 1)])
* inv_dx
* inv_dx;
let kappa = -(phi_xx + phi_yy + phi_zz) / grad_mag;
let f_scale = sigma * kappa;
forces[c] = [f_scale * nx_n, f_scale * ny_n, f_scale * nz_n];
}
}
}
forces
}
pub fn p2g_transfer(particles: &[FlipParticle], grid: &mut MacGrid) {
let dx = grid.dx;
let inv_dx = 1.0 / dx;
let nx = grid.nx;
let ny = grid.ny;
let nz = grid.nz;
let mut u_weight = vec![0.0f64; (nx + 1) * ny * nz];
let mut v_weight = vec![0.0f64; nx * (ny + 1) * nz];
let mut w_weight = vec![0.0f64; nx * ny * (nz + 1)];
let mut u_num = vec![0.0f64; (nx + 1) * ny * nz];
let mut v_num = vec![0.0f64; nx * (ny + 1) * nz];
let mut w_num = vec![0.0f64; nx * ny * (nz + 1)];
for p in particles {
let [px, py, pz] = p.position;
let iu = (px * inv_dx).floor() as isize;
let ju = (py * inv_dx - 0.5).floor() as isize;
let ku = (pz * inv_dx - 0.5).floor() as isize;
let fxu = px * inv_dx - iu as f64;
let fyu = py * inv_dx - 0.5 - ju as f64;
let fzu = pz * inv_dx - 0.5 - ku as f64;
for dz in 0..2 {
for dy in 0..2 {
for dx_off in 0..2 {
let ni = (iu + dx_off as isize).clamp(0, nx as isize) as usize;
let nj = (ju + dy as isize).clamp(0, ny as isize - 1) as usize;
let nk = (ku + dz as isize).clamp(0, nz as isize - 1) as usize;
let wx = if dx_off == 0 { 1.0 - fxu } else { fxu };
let wy = if dy == 0 { 1.0 - fyu } else { fyu };
let wz = if dz == 0 { 1.0 - fzu } else { fzu };
let w = wx * wy * wz;
let uidx = nk * (nx + 1) * ny + nj * (nx + 1) + ni;
u_num[uidx] += w * p.velocity[0];
u_weight[uidx] += w;
}
}
}
let iv = (px * inv_dx - 0.5).floor() as isize;
let jv = (py * inv_dx).floor() as isize;
let kv = (pz * inv_dx - 0.5).floor() as isize;
let fxv = px * inv_dx - 0.5 - iv as f64;
let fyv = py * inv_dx - jv as f64;
let fzv = pz * inv_dx - 0.5 - kv as f64;
for dz in 0..2 {
for dy in 0..2 {
for dx_off in 0..2 {
let ni = (iv + dx_off as isize).clamp(0, nx as isize - 1) as usize;
let nj = (jv + dy as isize).clamp(0, ny as isize) as usize;
let nk = (kv + dz as isize).clamp(0, nz as isize - 1) as usize;
let wx = if dx_off == 0 { 1.0 - fxv } else { fxv };
let wy = if dy == 0 { 1.0 - fyv } else { fyv };
let wz = if dz == 0 { 1.0 - fzv } else { fzv };
let wt = wx * wy * wz;
let vidx = nk * nx * (ny + 1) + nj * nx + ni;
v_num[vidx] += wt * p.velocity[1];
v_weight[vidx] += wt;
}
}
}
let iw = (px * inv_dx - 0.5).floor() as isize;
let jw = (py * inv_dx - 0.5).floor() as isize;
let kw = (pz * inv_dx).floor() as isize;
let fxw = px * inv_dx - 0.5 - iw as f64;
let fyw = py * inv_dx - 0.5 - jw as f64;
let fzw = pz * inv_dx - kw as f64;
for dz in 0..2 {
for dy in 0..2 {
for dx_off in 0..2 {
let ni = (iw + dx_off as isize).clamp(0, nx as isize - 1) as usize;
let nj = (jw + dy as isize).clamp(0, ny as isize - 1) as usize;
let nk = (kw + dz as isize).clamp(0, nz as isize) as usize;
let wx = if dx_off == 0 { 1.0 - fxw } else { fxw };
let wy = if dy == 0 { 1.0 - fyw } else { fyw };
let wz = if dz == 0 { 1.0 - fzw } else { fzw };
let wt = wx * wy * wz;
let widx = nk * nx * ny + nj * nx + ni;
w_num[widx] += wt * p.velocity[2];
w_weight[widx] += wt;
}
}
}
}
for i in 0..u_num.len() {
grid.u[i] = if u_weight[i] > 1e-15 {
u_num[i] / u_weight[i]
} else {
0.0
};
}
for i in 0..v_num.len() {
grid.v[i] = if v_weight[i] > 1e-15 {
v_num[i] / v_weight[i]
} else {
0.0
};
}
for i in 0..w_num.len() {
grid.w[i] = if w_weight[i] > 1e-15 {
w_num[i] / w_weight[i]
} else {
0.0
};
}
}
pub fn g2p_transfer(
particles: &mut [FlipParticle],
grid_new: &MacGrid,
grid_old: &MacGrid,
flip_ratio: f64,
) {
for p in particles.iter_mut() {
let [px, py, pz] = p.position;
let dx = grid_new.dx;
let u_new = grid_new.interp_u(px, py, pz);
let u_old = grid_old.interp_u(px, py, pz);
let pic_vel = [u_new, 0.0, 0.0];
let flip_delta = [u_new - u_old, 0.0, 0.0];
let flip_vel = add3(p.velocity, flip_delta);
p.velocity = [
flip_ratio * flip_vel[0] + (1.0 - flip_ratio) * pic_vel[0],
p.velocity[1],
p.velocity[2],
];
p.position = add3(p.position, scale3(p.velocity, dx));
}
}
pub fn gpu_sph_density_parallel(particles: &mut Vec<SphParticle>, config: &SphConfig) {
let n = particles.len();
let mut densities = vec![0.0f64; n];
for i in 0..n {
let mut rho = 0.0;
for j in 0..n {
let r_vec = sub3(particles[i].position, particles[j].position);
let r = length3(r_vec);
rho += particles[j].mass * SphKernels::poly6(r, config.h);
}
densities[i] = rho.max(1e-6);
}
for (i, p) in particles.iter_mut().enumerate() {
p.density = densities[i];
}
}
pub fn gpu_jacobi_pressure_solve(grid: &mut MacGrid, rho: f64, dt: f64, iterations: usize) {
grid.compute_divergence();
let scale = rho * grid.dx * grid.dx / dt;
let nx = grid.nx;
let ny = grid.ny;
let nz = grid.nz;
let mut p_new = grid.p.clone();
for _ in 0..iterations {
for k in 0..nz {
for j in 0..ny {
for i in 0..nx {
let idx = k * nx * ny + j * nx + i;
if grid.flags[idx] != 1 {
continue;
}
let mut nb_sum = 0.0;
let mut nb_cnt = 0u32;
macro_rules! nb {
($ii:expr, $jj:expr, $kk:expr) => {{
nb_sum += grid.p[$kk * nx * ny + $jj * nx + $ii];
nb_cnt += 1;
}};
}
if i + 1 < nx {
nb!(i + 1, j, k);
}
if i > 0 {
nb!(i - 1, j, k);
}
if j + 1 < ny {
nb!(i, j + 1, k);
}
if j > 0 {
nb!(i, j - 1, k);
}
if k + 1 < nz {
nb!(i, j, k + 1);
}
if k > 0 {
nb!(i, j, k - 1);
}
if nb_cnt > 0 {
p_new[idx] = (nb_sum - scale * grid.div[idx]) / nb_cnt as f64;
}
}
}
}
grid.p.copy_from_slice(&p_new);
}
}
pub fn gpu_lbm_bgk_collide(lbm: &mut LbmD2Q9) {
let nx = lbm.nx;
let ny = lbm.ny;
let inv_tau = lbm.inv_tau;
let mut updates: Vec<(usize, usize, usize, f64)> = Vec::with_capacity(nx * ny * D2Q9_Q);
for y in 0..ny {
for x in 0..nx {
if lbm.cell_type[y * nx + x] == LbmCellType::Solid {
continue;
}
let rho = lbm.density(x, y);
let [ux, uy] = lbm.velocity(x, y);
for q in 0..D2Q9_Q {
let f_eq = LbmD2Q9::f_equilibrium(rho, ux, uy, q);
let f_old = lbm.get_f(x, y, q);
let f_new = f_old - inv_tau * (f_old - f_eq);
updates.push((x, y, q, f_new));
}
}
}
for (x, y, q, val) in updates {
lbm.set_f(x, y, q, val);
}
}
pub fn morton_expand_bits(mut v: u32) -> u32 {
v &= 0x000003ff;
v = (v | (v << 16)) & 0xff0000ff;
v = (v | (v << 8)) & 0x0300f00f;
v = (v | (v << 4)) & 0x030c30c3;
v = (v | (v << 2)) & 0x09249249;
v
}
pub fn morton_encode_3d(x: u32, y: u32, z: u32) -> u32 {
morton_expand_bits(x) | (morton_expand_bits(y) << 1) | (morton_expand_bits(z) << 2)
}
pub fn morton_sort_particles(particles: &mut Vec<SphParticle>, domain_size: [f64; 3]) {
let bits = 1024u32;
let mut keyed: Vec<(u32, SphParticle)> = particles
.drain(..)
.map(|p| {
let xi = ((p.position[0] / domain_size[0]).clamp(0.0, 1.0) * (bits - 1) as f64) as u32;
let yi = ((p.position[1] / domain_size[1]).clamp(0.0, 1.0) * (bits - 1) as f64) as u32;
let zi = ((p.position[2] / domain_size[2]).clamp(0.0, 1.0) * (bits - 1) as f64) as u32;
(morton_encode_3d(xi, yi, zi), p)
})
.collect();
keyed.sort_by_key(|(code, _)| *code);
*particles = keyed.into_iter().map(|(_, p)| p).collect();
}
pub fn gpu_particle_integrate_euler(particles: &mut Vec<SphParticle>, dt: f64) {
for p in particles.iter_mut() {
let inv_rho = 1.0 / p.density.max(1e-6);
for d in 0..3 {
let accel = p.force[d] * inv_rho;
p.velocity[d] += accel * dt;
p.position[d] += p.velocity[d] * dt;
}
}
}
pub fn gpu_particle_integrate_verlet(particles: &mut Vec<SphParticle>, dt: f64, _dt_prev: f64) {
for p in particles.iter_mut() {
let inv_rho = 1.0 / p.density.max(1e-6);
for d in 0..3 {
let accel = p.force[d] * inv_rho;
p.position[d] += p.velocity[d] * dt + 0.5 * accel * dt * dt;
p.velocity[d] += accel * dt;
}
}
}
pub fn gpu_apply_boundary_box(particles: &mut Vec<SphParticle>, bounds: &GpuBoundaryBox) {
for p in particles.iter_mut() {
for d in 0..3 {
if p.position[d] < bounds.min[d] {
p.position[d] = bounds.min[d];
p.velocity[d] = p.velocity[d].abs() * bounds.restitution;
}
if p.position[d] > bounds.max[d] {
p.position[d] = bounds.max[d];
p.velocity[d] = -p.velocity[d].abs() * bounds.restitution;
}
}
}
}
pub fn gpu_reduce_kinetic_energy(particles: &[SphParticle], mass: f64) -> f64 {
particles.iter().fold(0.0, |acc, p| {
let v2 = p.velocity[0] * p.velocity[0]
+ p.velocity[1] * p.velocity[1]
+ p.velocity[2] * p.velocity[2];
acc + 0.5 * mass * v2
})
}
pub fn gpu_reduce_momentum(particles: &[SphParticle], mass: f64) -> [f64; 3] {
particles.iter().fold([0.0f64; 3], |acc, p| {
[
acc[0] + mass * p.velocity[0],
acc[1] + mass * p.velocity[1],
acc[2] + mass * p.velocity[2],
]
})
}
pub fn gpu_advect_2d(
field: &[f64],
vel: &[[f64; 2]],
nx: usize,
ny: usize,
dx: f64,
dt: f64,
) -> Vec<f64> {
let mut out = vec![0.0f64; nx * ny];
let inv_dx = 1.0 / dx;
let sample = |x: f64, y: f64| -> f64 {
let ix = (x * inv_dx - 0.5).floor() as isize;
let iy = (y * inv_dx - 0.5).floor() as isize;
let fx = x * inv_dx - 0.5 - ix as f64;
let fy = y * inv_dx - 0.5 - iy as f64;
let ci = |v: isize| v.clamp(0, nx as isize - 1) as usize;
let cj = |v: isize| v.clamp(0, ny as isize - 1) as usize;
let f00 = field[cj(iy) * nx + ci(ix)];
let f10 = field[cj(iy) * nx + ci(ix + 1)];
let f01 = field[cj(iy + 1) * nx + ci(ix)];
let f11 = field[cj(iy + 1) * nx + ci(ix + 1)];
let f0 = f00 + fx * (f10 - f00);
let f1 = f01 + fx * (f11 - f01);
f0 + fy * (f1 - f0)
};
for j in 0..ny {
for i in 0..nx {
let xc = (i as f64 + 0.5) * dx;
let yc = (j as f64 + 0.5) * dx;
let idx = j * nx + i;
let [vx, vy] = vel[idx];
let xb = xc - vx * dt;
let yb = yc - vy * dt;
out[idx] = sample(xb, yb);
}
}
out
}
pub fn gpu_pressure_poisson_jacobi_2d(
pressure: &mut Vec<f64>,
div: &[f64],
nx: usize,
ny: usize,
dx: f64,
iterations: usize,
) {
let dx2 = dx * dx;
let mut p_new = pressure.clone();
for _ in 0..iterations {
for j in 0..ny {
for i in 0..nx {
let idx = j * nx + i;
let mut nb = 0.0;
let mut cnt = 0u32;
if i + 1 < nx {
nb += pressure[j * nx + i + 1];
cnt += 1;
}
if i > 0 {
nb += pressure[j * nx + i - 1];
cnt += 1;
}
if j + 1 < ny {
nb += pressure[(j + 1) * nx + i];
cnt += 1;
}
if j > 0 {
nb += pressure[(j - 1) * nx + i];
cnt += 1;
}
if cnt > 0 {
p_new[idx] = (nb - dx2 * div[idx]) / cnt as f64;
}
}
}
pressure.copy_from_slice(&p_new);
}
}