#![allow(dead_code)]
#![allow(clippy::too_many_arguments)]
use crate::compute::WgpuBufferHandle;
#[cfg(feature = "wgpu-backend")]
use {crate::compute::WgpuInitError, crate::compute::wgpu_backend::real::WgpuBackendReal, wgpu};
pub const D3Q19_EX: [i32; 19] = [0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0];
pub const D3Q19_EY: [i32; 19] = [0, 0, 0, 1, -1, 0, 0, 1, 1, -1, -1, 0, 0, 0, 0, 1, -1, 1, -1];
pub const D3Q19_EZ: [i32; 19] = [0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1];
pub const D3Q19_W: [f64; 19] = [
1.0 / 3.0, 1.0 / 18.0,
1.0 / 18.0,
1.0 / 18.0,
1.0 / 18.0,
1.0 / 18.0,
1.0 / 18.0, 1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0, 1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
];
pub const D3Q19_OPP: [usize; 19] = [
0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 14, 13, 12, 11, 18, 17, 16, 15,
];
#[derive(Debug, Clone)]
pub struct LbmConfig {
pub nx: usize,
pub ny: usize,
pub nz: usize,
pub tau: f64,
pub rho0: f64,
pub force_x: f64,
pub force_y: f64,
pub force_z: f64,
}
impl Default for LbmConfig {
fn default() -> Self {
Self {
nx: 16,
ny: 16,
nz: 16,
tau: 0.6,
rho0: 1.0,
force_x: 0.0,
force_y: 0.0,
force_z: 0.0,
}
}
}
impl LbmConfig {
pub fn viscosity(&self) -> f64 {
(1.0 / 3.0) * (self.tau - 0.5)
}
pub fn n_cells(&self) -> usize {
self.nx * self.ny * self.nz
}
}
#[cfg(feature = "wgpu-backend")]
struct LbmGpuState {
backend: WgpuBackendReal,
f_buf_a: WgpuBufferHandle,
f_buf_b: WgpuBufferHandle,
params_buf: WgpuBufferHandle,
b_is_input: bool,
}
#[cfg(feature = "wgpu-backend")]
impl LbmGpuState {
fn try_new(sim: &LbmSimulation) -> Result<Self, WgpuInitError> {
let mut backend = WgpuBackendReal::try_new()?;
let nx = sim.config.nx as u32;
let ny = sim.config.ny as u32;
let nz = sim.config.nz as u32;
let nc = sim.config.n_cells();
let f_bytes = (19 * nc * 4) as u64;
let f_buf_a = backend.create_buffer_storage(f_bytes);
let f_buf_b = backend.create_buffer_storage(f_bytes);
let params_buf = backend.create_buffer_storage(20_u64);
let omega = (1.0_f64 / sim.config.tau) as f32;
let params_data: [u32; 5] = [nx, ny, nz, omega.to_bits(), 0u32];
backend.queue_write_buffer_raw(¶ms_buf, bytemuck::cast_slice(¶ms_data));
let f_flat: Vec<f32> = flatten_soa_to_f32(&sim.f, nc);
backend.queue_write_buffer_f32(&f_buf_a, &f_flat);
Ok(Self {
backend,
f_buf_a,
f_buf_b,
params_buf,
b_is_input: false,
})
}
fn input_buf(&self) -> WgpuBufferHandle {
if self.b_is_input {
self.f_buf_b
} else {
self.f_buf_a
}
}
fn output_buf(&self) -> WgpuBufferHandle {
if self.b_is_input {
self.f_buf_a
} else {
self.f_buf_b
}
}
}
fn flatten_soa_to_f32(f: &[Vec<f64>], nc: usize) -> Vec<f32> {
let mut flat = Vec::with_capacity(19 * nc);
for dir in f.iter().take(19) {
for &val in dir.iter().take(nc) {
flat.push(val as f32);
}
}
flat
}
fn unflatten_f32_to_soa(flat: &[f32], nc: usize) -> Vec<Vec<f64>> {
let mut f = Vec::with_capacity(19);
for q in 0..19 {
let mut dir = Vec::with_capacity(nc);
for cell in 0..nc {
let idx = q * nc + cell;
dir.push(if idx < flat.len() {
flat[idx] as f64
} else {
0.0
});
}
f.push(dir);
}
f
}
pub struct LbmSimulation {
pub config: LbmConfig,
pub f: Vec<Vec<f64>>,
f_tmp: Vec<Vec<f64>>,
pub solid: Vec<bool>,
pub lid_vel_x: f64,
pub lid_vel_y: f64,
pub lid_vel_z: f64,
pub step_count: u64,
#[cfg(feature = "wgpu-backend")]
gpu_state: Option<LbmGpuState>,
#[cfg(feature = "wgpu-backend")]
gpu_dirty: bool,
}
impl LbmSimulation {
pub fn new(config: LbmConfig) -> Self {
let nc = config.n_cells();
let rho0 = config.rho0;
let mut f = Vec::with_capacity(19);
let mut f_tmp = Vec::with_capacity(19);
for &wi in D3Q19_W.iter() {
f.push(vec![wi * rho0; nc]);
f_tmp.push(vec![0.0; nc]);
}
let mut solid = vec![false; nc];
let (nx, ny, nz) = (config.nx, config.ny, config.nz);
for z in 0..nz {
for y in 0..ny {
for x in 0..nx {
let idx = x + nx * (y + ny * z);
if y == 0 || y == ny - 1 || z == 0 || z == nz - 1 {
solid[idx] = true;
}
}
}
}
Self {
config,
f,
f_tmp,
solid,
lid_vel_x: 0.0,
lid_vel_y: 0.0,
lid_vel_z: 0.0,
step_count: 0,
#[cfg(feature = "wgpu-backend")]
gpu_state: None,
#[cfg(feature = "wgpu-backend")]
gpu_dirty: false,
}
}
pub fn set_lid_velocity(&mut self, ux: f64, uy: f64, uz: f64) {
self.lid_vel_x = ux;
self.lid_vel_y = uy;
self.lid_vel_z = uz;
}
pub fn has_gpu(&self) -> bool {
#[cfg(feature = "wgpu-backend")]
{
self.gpu_state.is_some()
}
#[cfg(not(feature = "wgpu-backend"))]
{
false
}
}
pub fn step(&mut self) {
#[cfg(feature = "wgpu-backend")]
{
self.step_gpu();
}
#[cfg(not(feature = "wgpu-backend"))]
{
self.step_cpu();
}
self.step_count += 1;
}
#[cfg(feature = "wgpu-backend")]
fn step_gpu(&mut self) {
if self.gpu_state.is_none() {
match LbmGpuState::try_new(self) {
Ok(state) => {
self.gpu_state = Some(state);
}
Err(e) => {
eprintln!("LBM GPU init failed ({e}), falling back to CPU");
self.step_cpu();
return;
}
}
}
let state = self
.gpu_state
.as_mut()
.expect("LbmGpuState must be initialised");
let input = state.input_buf();
let output = state.output_buf();
let nx = self.config.nx as u32;
let ny = self.config.ny as u32;
let nz = self.config.nz as u32;
let wg_x = nx.div_ceil(8);
let wg_y = ny.div_ceil(8);
let wg_z = nz.div_ceil(8);
const LBM_BGK_D3Q19_WGSL: &str = include_str!("shaders/lbm_bgk_d3q19.wgsl");
state
.backend
.dispatch_wgsl(
LBM_BGK_D3Q19_WGSL,
"main",
&[
(
state.params_buf,
wgpu::BufferBindingType::Storage { read_only: true },
),
(input, wgpu::BufferBindingType::Storage { read_only: true }),
(
output,
wgpu::BufferBindingType::Storage { read_only: false },
),
],
[wg_x, wg_y, wg_z],
)
.expect("LBM BGK D3Q19 dispatch_wgsl failed");
state.b_is_input = !state.b_is_input;
self.gpu_dirty = true;
}
#[cfg(feature = "wgpu-backend")]
fn sync_from_gpu(&mut self) {
if !self.gpu_dirty {
return;
}
let Some(state) = self.gpu_state.as_ref() else {
return;
};
let nc = self.config.n_cells();
let read_buf = state.input_buf();
let flat = state.backend.read_buffer_f32(read_buf);
self.f = unflatten_f32_to_soa(&flat, nc);
self.gpu_dirty = false;
}
fn step_cpu(&mut self) {
let nc = self.config.n_cells();
let nx = self.config.nx;
let ny = self.config.ny;
let nz = self.config.nz;
let tau = self.config.tau;
let rho0 = self.config.rho0;
let omega = 1.0 / tau;
for cell in 0..nc {
if self.solid[cell] {
continue;
}
let mut rho = 0.0_f64;
let mut ux = 0.0_f64;
let mut uy = 0.0_f64;
let mut uz = 0.0_f64;
for i in 0..19 {
let fi = self.f[i][cell];
rho += fi;
ux += fi * D3Q19_EX[i] as f64;
uy += fi * D3Q19_EY[i] as f64;
uz += fi * D3Q19_EZ[i] as f64;
}
ux = (ux + self.config.force_x * 0.5) / rho;
uy = (uy + self.config.force_y * 0.5) / rho;
uz = (uz + self.config.force_z * 0.5) / rho;
let u2 = ux * ux + uy * uy + uz * uz;
for i in 0..19 {
let eu =
D3Q19_EX[i] as f64 * ux + D3Q19_EY[i] as f64 * uy + D3Q19_EZ[i] as f64 * uz;
let feq = D3Q19_W[i] * rho * (1.0 + 3.0 * eu + 4.5 * eu * eu - 1.5 * u2);
self.f[i][cell] += omega * (feq - self.f[i][cell]);
}
}
for i in 0..19 {
for cell in 0..nc {
self.f_tmp[i][cell] = if self.solid[cell] {
self.f[i][cell]
} else {
0.0
};
}
}
for z in 0..nz {
for y in 0..ny {
for x in 0..nx {
let src = x + nx * (y + ny * z);
if self.solid[src] {
continue;
}
for i in 0..19 {
let dx = D3Q19_EX[i];
let dy = D3Q19_EY[i];
let dz = D3Q19_EZ[i];
let nx2 = nx as i32;
let ny2 = ny as i32;
let nz2 = nz as i32;
let xd = ((x as i32 + dx).rem_euclid(nx2)) as usize;
let yd = ((y as i32 + dy).rem_euclid(ny2)) as usize;
let zd = ((z as i32 + dz).rem_euclid(nz2)) as usize;
let dst = xd + nx * (yd + ny * zd);
if self.solid[dst] {
self.f_tmp[D3Q19_OPP[i]][src] += self.f[i][src];
} else {
self.f_tmp[i][dst] += self.f[i][src];
}
}
}
}
}
std::mem::swap(&mut self.f, &mut self.f_tmp);
let ux_lid = self.lid_vel_x;
let uy_lid = self.lid_vel_y;
let uz_lid = self.lid_vel_z;
if ux_lid != 0.0 || uy_lid != 0.0 || uz_lid != 0.0 {
let ny_m1 = ny - 1;
for z in 1..nz - 1 {
for x in 1..nx - 1 {
let cell = x + nx * (ny_m1 + ny * z);
let rho = rho0;
let u2 = ux_lid * ux_lid + uy_lid * uy_lid + uz_lid * uz_lid;
for i in 0..19 {
let eu = D3Q19_EX[i] as f64 * ux_lid
+ D3Q19_EY[i] as f64 * uy_lid
+ D3Q19_EZ[i] as f64 * uz_lid;
self.f[i][cell] =
D3Q19_W[i] * rho * (1.0 + 3.0 * eu + 4.5 * eu * eu - 1.5 * u2);
}
}
}
}
}
pub fn cell_macro(&mut self, x: usize, y: usize, z: usize) -> (f64, [f64; 3]) {
#[cfg(feature = "wgpu-backend")]
self.sync_from_gpu();
let nc = x + self.config.nx * (y + self.config.ny * z);
let mut rho = 0.0_f64;
let mut u = [0.0_f64; 3];
for i in 0..19 {
let fi = self.f[i][nc];
rho += fi;
u[0] += fi * D3Q19_EX[i] as f64;
u[1] += fi * D3Q19_EY[i] as f64;
u[2] += fi * D3Q19_EZ[i] as f64;
}
if rho > 1e-10 {
u[0] /= rho;
u[1] /= rho;
u[2] /= rho;
}
(rho, u)
}
pub fn mean_velocity(&mut self) -> (f64, f64, f64) {
#[cfg(feature = "wgpu-backend")]
self.sync_from_gpu();
let nc = self.config.n_cells();
let fluid: Vec<usize> = (0..nc).filter(|&i| !self.solid[i]).collect();
if fluid.is_empty() {
return (0.0, 0.0, 0.0);
}
let fx_half = self.config.force_x * 0.5;
let fy_half = self.config.force_y * 0.5;
let fz_half = self.config.force_z * 0.5;
let mut ux = 0.0_f64;
let mut uy = 0.0_f64;
let mut uz = 0.0_f64;
for &cell in &fluid {
let mut rho = 0.0;
let mut lu = [0.0_f64; 3];
for i in 0..19 {
let fi = self.f[i][cell];
rho += fi;
lu[0] += fi * D3Q19_EX[i] as f64;
lu[1] += fi * D3Q19_EY[i] as f64;
lu[2] += fi * D3Q19_EZ[i] as f64;
}
if rho > 1e-10 {
ux += (lu[0] + fx_half) / rho;
uy += (lu[1] + fy_half) / rho;
uz += (lu[2] + fz_half) / rho;
}
}
let n = fluid.len() as f64;
(ux / n, uy / n, uz / n)
}
pub fn mean_density(&mut self) -> f64 {
#[cfg(feature = "wgpu-backend")]
self.sync_from_gpu();
let nc = self.config.n_cells();
let (sum, count) = (0..nc)
.filter(|&i| !self.solid[i])
.map(|i| (0..19_usize).map(|d| self.f[d][i]).sum::<f64>())
.fold((0.0_f64, 0_usize), |(s, c), rho| (s + rho, c + 1));
if count == 0 { 0.0 } else { sum / count as f64 }
}
pub fn max_velocity_magnitude(&mut self) -> f64 {
#[cfg(feature = "wgpu-backend")]
self.sync_from_gpu();
let nc = self.config.n_cells();
let mut max_mag = 0.0_f64;
for cell in 0..nc {
if self.solid[cell] {
continue;
}
let mut rho = 0.0_f64;
let mut u = [0.0_f64; 3];
for i in 0..19 {
let fi = self.f[i][cell];
rho += fi;
u[0] += fi * D3Q19_EX[i] as f64;
u[1] += fi * D3Q19_EY[i] as f64;
u[2] += fi * D3Q19_EZ[i] as f64;
}
if rho > 1e-10 {
let mag =
((u[0] / rho).powi(2) + (u[1] / rho).powi(2) + (u[2] / rho).powi(2)).sqrt();
max_mag = max_mag.max(mag);
}
}
max_mag
}
}
pub struct LbmGpuSolver {
pub nx: u32,
pub ny: u32,
pub nz: u32,
pub omega: f32,
pub step_count: u64,
inner: LbmGpuSolverInner,
}
enum LbmGpuSolverInner {
Cpu {
sim: LbmSimulation,
},
#[cfg(feature = "wgpu-backend")]
Gpu {
backend: crate::compute::wgpu_backend::real::WgpuBackendReal,
params_buf: crate::compute::WgpuBufferHandle,
f_buf_a: crate::compute::WgpuBufferHandle,
f_buf_b: crate::compute::WgpuBufferHandle,
current_b_is_input: bool,
},
}
#[cfg(feature = "wgpu-backend")]
const LBM_BGK_D3Q19_WGSL: &str = include_str!("shaders/lbm_bgk_d3q19.wgsl");
impl LbmGpuSolver {
pub fn new(nx: u32, ny: u32, nz: u32, omega: f32) -> Self {
#[cfg(feature = "wgpu-backend")]
{
use crate::compute::wgpu_backend::real::WgpuBackendReal;
if let Ok(backend) = WgpuBackendReal::try_new() {
return Self::new_gpu(backend, nx, ny, nz, omega);
}
}
let cfg = LbmConfig {
nx: nx as usize,
ny: ny as usize,
nz: nz as usize,
tau: if omega > 0.0 { 1.0 / omega as f64 } else { 0.6 },
..LbmConfig::default()
};
Self {
nx,
ny,
nz,
omega,
step_count: 0,
inner: LbmGpuSolverInner::Cpu {
sim: LbmSimulation::new(cfg),
},
}
}
pub fn new_cpu(nx: u32, ny: u32, nz: u32, omega: f32) -> Self {
let cfg = LbmConfig {
nx: nx as usize,
ny: ny as usize,
nz: nz as usize,
tau: if omega > 0.0 { 1.0 / omega as f64 } else { 0.6 },
..LbmConfig::default()
};
Self {
nx,
ny,
nz,
omega,
step_count: 0,
inner: LbmGpuSolverInner::Cpu {
sim: LbmSimulation::new(cfg),
},
}
}
pub fn is_gpu(&self) -> bool {
match &self.inner {
LbmGpuSolverInner::Cpu { .. } => false,
#[cfg(feature = "wgpu-backend")]
LbmGpuSolverInner::Gpu { .. } => true,
}
}
pub fn step(&mut self) -> Result<(), crate::GpuError> {
match &mut self.inner {
LbmGpuSolverInner::Cpu { sim } => {
sim.step();
self.step_count += 1;
Ok(())
}
#[cfg(feature = "wgpu-backend")]
LbmGpuSolverInner::Gpu {
backend,
params_buf,
f_buf_a,
f_buf_b,
current_b_is_input,
} => {
let (input, output) = if *current_b_is_input {
(*f_buf_b, *f_buf_a)
} else {
(*f_buf_a, *f_buf_b)
};
let nx = self.nx;
let ny = self.ny;
let nz = self.nz;
let wg_x = nx.div_ceil(8);
let wg_y = ny.div_ceil(8);
let wg_z = nz.div_ceil(8);
backend
.dispatch_wgsl(
LBM_BGK_D3Q19_WGSL,
"main",
&[
(
*params_buf,
wgpu::BufferBindingType::Storage { read_only: true },
),
(input, wgpu::BufferBindingType::Storage { read_only: true }),
(
output,
wgpu::BufferBindingType::Storage { read_only: false },
),
],
[wg_x, wg_y, wg_z],
)
.map_err(|e| crate::GpuError::ShaderDispatch(e.to_string()))?;
*current_b_is_input = !*current_b_is_input;
self.step_count += 1;
Ok(())
}
}
}
pub fn read_density(&self) -> Vec<f32> {
let nc = (self.nx * self.ny * self.nz) as usize;
match &self.inner {
LbmGpuSolverInner::Cpu { sim } => (0..nc)
.map(|cell| (0..19).map(|i| sim.f[i][cell] as f32).sum::<f32>())
.collect(),
#[cfg(feature = "wgpu-backend")]
LbmGpuSolverInner::Gpu {
backend,
f_buf_a,
f_buf_b,
current_b_is_input,
..
} => {
let read_buf = if *current_b_is_input {
*f_buf_b
} else {
*f_buf_a
};
let raw = backend.read_buffer_f64(read_buf);
let mut rho = vec![0.0_f32; nc];
for q in 0..19_usize {
for (cell, rho_c) in rho.iter_mut().enumerate() {
let raw_idx = q * nc + cell;
if raw_idx < raw.len() {
*rho_c += raw[raw_idx] as f32;
}
}
}
rho
}
}
}
pub fn read_velocity(&self) -> Vec<[f32; 3]> {
let nc = (self.nx * self.ny * self.nz) as usize;
match &self.inner {
LbmGpuSolverInner::Cpu { sim } => (0..nc)
.map(|cell| {
let rho: f64 = (0..19).map(|i| sim.f[i][cell]).sum();
if rho > 1e-10 {
let ux = (0..19)
.map(|i| sim.f[i][cell] * D3Q19_EX[i] as f64)
.sum::<f64>()
/ rho;
let uy = (0..19)
.map(|i| sim.f[i][cell] * D3Q19_EY[i] as f64)
.sum::<f64>()
/ rho;
let uz = (0..19)
.map(|i| sim.f[i][cell] * D3Q19_EZ[i] as f64)
.sum::<f64>()
/ rho;
[ux as f32, uy as f32, uz as f32]
} else {
[0.0; 3]
}
})
.collect(),
#[cfg(feature = "wgpu-backend")]
LbmGpuSolverInner::Gpu {
backend,
f_buf_a,
f_buf_b,
current_b_is_input,
..
} => {
let read_buf = if *current_b_is_input {
*f_buf_b
} else {
*f_buf_a
};
let raw = backend.read_buffer_f64(read_buf);
let mut vel = vec![[0.0_f32; 3]; nc];
for q in 0..19_usize {
for (cell, vel_c) in vel.iter_mut().enumerate() {
let raw_idx = q * nc + cell;
if raw_idx < raw.len() {
let fval = raw[raw_idx] as f32;
vel_c[0] += D3Q19_EX[q] as f32 * fval;
vel_c[1] += D3Q19_EY[q] as f32 * fval;
vel_c[2] += D3Q19_EZ[q] as f32 * fval;
}
}
}
let rho = self.read_density();
for cell in 0..nc {
let r = rho[cell];
if r > 1e-10 {
vel[cell][0] /= r;
vel[cell][1] /= r;
vel[cell][2] /= r;
}
}
vel
}
}
}
#[cfg(feature = "wgpu-backend")]
fn new_gpu(
mut backend: crate::compute::wgpu_backend::real::WgpuBackendReal,
nx: u32,
ny: u32,
nz: u32,
omega: f32,
) -> Self {
let nc = (nx * ny * nz) as usize;
let f_bytes = (19 * nc * 4) as u64;
let params_bytes: u64 = 20;
let params_buf = backend.create_buffer_storage(params_bytes);
let f_buf_a = backend.create_buffer_storage(f_bytes);
let f_buf_b = backend.create_buffer_storage(f_bytes);
let rho0 = 1.0_f64;
let f_init: Vec<f64> = (0..19)
.flat_map(|q| (0..nc).map(move |_| D3Q19_W[q] * rho0))
.collect();
backend.write_buffer_f64(f_buf_a, &f_init);
let omega_bits = omega.to_bits();
let params_data: [u32; 5] = [nx, ny, nz, omega_bits, 0];
let params_bytes_slice: &[u8] = bytemuck::cast_slice(¶ms_data);
backend.queue_write_buffer_raw(¶ms_buf, params_bytes_slice);
Self {
nx,
ny,
nz,
omega,
step_count: 0,
inner: LbmGpuSolverInner::Gpu {
backend,
params_buf,
f_buf_a,
f_buf_b,
current_b_is_input: false,
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_d3q19_weights_sum() {
let sum: f64 = D3Q19_W.iter().sum();
assert!(
(sum - 1.0).abs() < 1e-12,
"weights should sum to 1, got {}",
sum
);
}
#[test]
fn test_d3q19_opposite_index() {
for i in 0..19 {
let j = D3Q19_OPP[i];
assert_eq!(
D3Q19_EX[i], -D3Q19_EX[j],
"e_x[{}]={} should equal -e_x[{}]={}",
i, D3Q19_EX[i], j, D3Q19_EX[j]
);
assert_eq!(D3Q19_EY[i], -D3Q19_EY[j]);
assert_eq!(D3Q19_EZ[i], -D3Q19_EZ[j]);
}
}
#[test]
fn test_lbm_construction() {
let sim = LbmSimulation::new(LbmConfig {
nx: 4,
ny: 4,
nz: 4,
..LbmConfig::default()
});
assert_eq!(sim.config.n_cells(), 64);
assert_eq!(sim.f.len(), 19);
assert_eq!(sim.f[0].len(), 64);
}
#[test]
fn test_lbm_mass_conservation() {
let cfg = LbmConfig {
nx: 6,
ny: 6,
nz: 6,
tau: 0.6,
..LbmConfig::default()
};
let mut sim = LbmSimulation::new(cfg);
let mass_before: f64 = (0..19).map(|i| sim.f[i].iter().sum::<f64>()).sum();
for _ in 0..10 {
sim.step();
}
sim.mean_density(); let mass_after: f64 = (0..19).map(|i| sim.f[i].iter().sum::<f64>()).sum();
assert!(
(mass_before - mass_after).abs() / mass_before < 0.02,
"mass should be conserved: before={:.4} after={:.4}",
mass_before,
mass_after
);
}
#[test]
#[ignore = "pre-existing CPU bug: lid BC writes to solid cells that never stream — needs Zou-He moving-wall or relocation to y=ny-2"]
fn test_lbm_lid_driven_velocity() {
let cfg = LbmConfig {
nx: 6,
ny: 6,
nz: 6,
tau: 0.55,
..LbmConfig::default()
};
let mut sim = LbmSimulation::new(cfg);
sim.set_lid_velocity(0.05, 0.0, 0.0);
for _ in 0..50 {
sim.step_cpu();
}
let max_v = sim.max_velocity_magnitude();
assert!(max_v > 0.0, "lid-driven max_v should be > 0, got {}", max_v);
}
#[test]
fn test_lbm_viscosity() {
let cfg = LbmConfig {
tau: 0.6,
..LbmConfig::default()
};
let nu = cfg.viscosity();
assert!((nu - 1.0 / 30.0).abs() < 1e-10, "nu={}", nu);
}
#[test]
fn test_lbm_body_force_accelerates_flow() {
let cfg = LbmConfig {
nx: 4,
ny: 4,
nz: 4,
tau: 0.55,
force_x: 1e-5, ..LbmConfig::default()
};
let mut sim = LbmSimulation::new(cfg);
for _ in 0..100 {
sim.step_cpu();
}
let (ux, _, _) = sim.mean_velocity();
assert!(
ux > 0.0,
"body force in +X should produce ux > 0, got {}",
ux
);
}
#[test]
fn test_lbm_gpu_solver_construction() {
let solver = LbmGpuSolver::new_cpu(8, 8, 8, 1.5);
assert_eq!(solver.nx, 8);
assert_eq!(solver.ny, 8);
assert_eq!(solver.nz, 8);
assert!((solver.omega - 1.5).abs() < 1e-6);
}
#[test]
fn test_lbm_gpu_solver_density_init() {
let solver = LbmGpuSolver::new_cpu(4, 4, 4, 1.5);
let rho = solver.read_density();
assert_eq!(rho.len(), 64);
for &r in &rho {
assert!((r - 1.0).abs() < 1e-4, "rho={r}");
}
}
#[test]
fn test_lbm_gpu_solver_step_conserves_mass_cpu() {
let mut solver = LbmGpuSolver::new_cpu(8, 8, 8, 1.5);
let rho_before: f32 = solver.read_density().iter().sum();
for _ in 0..10 {
solver.step().expect("step failed");
}
let rho_after: f32 = solver.read_density().iter().sum();
let rel_err = (rho_before - rho_after).abs() / rho_before;
assert!(
rel_err < 0.01,
"mass not conserved: before={rho_before:.4} after={rho_after:.4} rel_err={rel_err:.6}"
);
}
#[test]
fn test_lbm_gpu_lid_driven_cavity() {
let nx = 16_u32;
let ny = 16_u32;
let nz = 16_u32;
let omega = 1.5_f32;
let mut solver = LbmGpuSolver::new(nx, ny, nz, omega);
if let LbmGpuSolverInner::Cpu { ref mut sim } = solver.inner {
sim.set_lid_velocity(0.1, 0.0, 0.0);
}
for _ in 0..100 {
solver.step().expect("LBM GPU step failed");
}
let rho = solver.read_density();
let total_rho: f32 = rho.iter().sum();
let n = (nx * ny * nz) as f32;
let expected = n; let rel_err = (total_rho - expected).abs() / expected;
assert!(
rel_err < 0.01,
"density not conserved: sum(rho)={total_rho:.4} expected={expected:.4} rel_err={rel_err:.6}"
);
}
}