pub const D2Q9_VELOCITIES: [[i32; 2]; 9] = [
[0, 0],
[1, 0],
[0, 1],
[-1, 0],
[0, -1],
[1, 1],
[-1, 1],
[-1, -1],
[1, -1],
];
pub const D2Q9_WEIGHTS: [f64; 9] = [
4.0 / 9.0,
1.0 / 9.0,
1.0 / 9.0,
1.0 / 9.0,
1.0 / 9.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
1.0 / 36.0,
];
const OPPOSITE: [usize; 9] = [0, 3, 4, 1, 2, 7, 8, 5, 6];
const CS2: f64 = 1.0 / 3.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BoundaryCondition {
Periodic,
NoSlip,
FreeSlip,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuLbmDispatch {
Cpu,
Simulated,
}
#[derive(Debug, Clone)]
pub struct LbmConfig {
pub nx: usize,
pub ny: usize,
pub tau: f64,
pub boundary: BoundaryCondition,
pub dispatch: GpuLbmDispatch,
}
#[derive(Debug, Clone)]
pub struct LbmState {
pub f: Vec<f64>,
pub rho: Vec<f64>,
pub u: Vec<[f64; 2]>,
pub step: usize,
}
pub struct GpuLbm2D {
config: LbmConfig,
state: LbmState,
f_buf: Vec<f64>,
}
pub fn equilibrium_f(rho: f64, ux: f64, uy: f64) -> [f64; 9] {
let u2 = ux * ux + uy * uy;
let mut feq = [0.0_f64; 9];
for q in 0..9 {
let cx = D2Q9_VELOCITIES[q][0] as f64;
let cy = D2Q9_VELOCITIES[q][1] as f64;
let cu = cx * ux + cy * uy;
feq[q] = D2Q9_WEIGHTS[q]
* rho
* (1.0 + cu / CS2 + cu * cu / (2.0 * CS2 * CS2) - u2 / (2.0 * CS2));
}
feq
}
pub fn collide(f: &mut [f64], rho: &[f64], u: &[[f64; 2]], tau: f64, nx: usize, ny: usize) {
let omega = 1.0 / tau;
for y in 0..ny {
for x in 0..nx {
let cell = y * nx + x;
let r = rho[cell];
let [ux, uy] = u[cell];
let feq = equilibrium_f(r, ux, uy);
let base = cell * 9;
for q in 0..9 {
f[base + q] += omega * (feq[q] - f[base + q]);
}
}
}
}
pub fn stream(f: &mut [f64], nx: usize, ny: usize) {
let n = nx * ny * 9;
let mut f_new = vec![0.0_f64; n];
for y in 0..ny {
for x in 0..nx {
for q in 0..9 {
let cx = D2Q9_VELOCITIES[q][0];
let cy = D2Q9_VELOCITIES[q][1];
let xd = ((x as i64 + cx as i64).rem_euclid(nx as i64)) as usize;
let yd = ((y as i64 + cy as i64).rem_euclid(ny as i64)) as usize;
let src = (y * nx + x) * 9 + q;
let dst = (yd * nx + xd) * 9 + q;
f_new[dst] = f[src];
}
}
}
f.copy_from_slice(&f_new);
}
pub fn compute_macroscopic(f: &[f64], rho: &mut [f64], u: &mut [[f64; 2]], nx: usize, ny: usize) {
for y in 0..ny {
for x in 0..nx {
let cell = y * nx + x;
let base = cell * 9;
let mut r = 0.0_f64;
let mut mx = 0.0_f64;
let mut my = 0.0_f64;
for q in 0..9 {
let fq = f[base + q];
r += fq;
mx += D2Q9_VELOCITIES[q][0] as f64 * fq;
my += D2Q9_VELOCITIES[q][1] as f64 * fq;
}
rho[cell] = r;
if r > 1e-30 {
u[cell] = [mx / r, my / r];
} else {
u[cell] = [0.0, 0.0];
}
}
}
}
fn apply_noslip_walls(f: &mut [f64], nx: usize, ny: usize) {
let mut is_wall = vec![false; nx * ny];
for x in 0..nx {
is_wall[x] = true;
is_wall[(ny - 1) * nx + x] = true;
}
for y in 0..ny {
is_wall[y * nx] = true;
is_wall[y * nx + (nx - 1)] = true;
}
for cell in 0..nx * ny {
if !is_wall[cell] {
continue;
}
let base = cell * 9;
let mut tmp = [0.0_f64; 9];
for q in 0..9 {
tmp[OPPOSITE[q]] = f[base + q];
}
f[base..base + 9].copy_from_slice(&tmp);
}
}
fn apply_freeslip_walls(f: &mut [f64], nx: usize, ny: usize) {
for x in 0..nx {
{
let cell = x;
let b = cell * 9;
let f4 = f[b + 4];
let f7 = f[b + 7];
let f8 = f[b + 8];
f[b + 2] = f4;
f[b + 6] = f7;
f[b + 5] = f8;
f[b + 4] = 0.0;
f[b + 7] = 0.0;
f[b + 8] = 0.0;
}
{
let cell = (ny - 1) * nx + x;
let b = cell * 9;
let f2 = f[b + 2];
let f5 = f[b + 5];
let f6 = f[b + 6];
f[b + 4] = f2;
f[b + 8] = f5;
f[b + 7] = f6;
f[b + 2] = 0.0;
f[b + 5] = 0.0;
f[b + 6] = 0.0;
}
}
for y in 0..ny {
{
let cell = y * nx;
let b = cell * 9;
let f3 = f[b + 3];
let f6 = f[b + 6];
let f7 = f[b + 7];
f[b + 1] = f3;
f[b + 5] = f6;
f[b + 8] = f7;
f[b + 3] = 0.0;
f[b + 6] = 0.0;
f[b + 7] = 0.0;
}
{
let cell = y * nx + (nx - 1);
let b = cell * 9;
let f1 = f[b + 1];
let f5 = f[b + 5];
let f8 = f[b + 8];
f[b + 3] = f1;
f[b + 6] = f5;
f[b + 7] = f8;
f[b + 1] = 0.0;
f[b + 5] = 0.0;
f[b + 8] = 0.0;
}
}
}
impl GpuLbm2D {
pub fn new(config: LbmConfig) -> Self {
let nx = config.nx;
let ny = config.ny;
let n_cells = nx * ny;
let mut f = vec![0.0_f64; n_cells * 9];
let rho = vec![1.0_f64; n_cells];
let u = vec![[0.0_f64; 2]; n_cells];
for cell in 0..n_cells {
let feq = equilibrium_f(1.0, 0.0, 0.0);
let base = cell * 9;
f[base..base + 9].copy_from_slice(&feq);
}
let f_buf = f.clone();
let state = LbmState { f, rho, u, step: 0 };
Self {
config,
state,
f_buf,
}
}
pub fn poiseuille_init(config: LbmConfig, max_velocity: f64) -> Self {
let nx = config.nx;
let ny = config.ny;
let n_cells = nx * ny;
let mut f = vec![0.0_f64; n_cells * 9];
let mut rho = vec![1.0_f64; n_cells];
let mut u = vec![[0.0_f64; 2]; n_cells];
let ny_f = (ny.saturating_sub(1)).max(1) as f64;
for y in 0..ny {
let y_norm = y as f64 / ny_f;
let ux = max_velocity * 4.0 * y_norm * (1.0 - y_norm);
for x in 0..nx {
let cell = y * nx + x;
rho[cell] = 1.0;
u[cell] = [ux, 0.0];
let feq = equilibrium_f(1.0, ux, 0.0);
let base = cell * 9;
f[base..base + 9].copy_from_slice(&feq);
}
}
let f_buf = f.clone();
let state = LbmState { f, rho, u, step: 0 };
Self {
config,
state,
f_buf,
}
}
pub fn step(&mut self, n_steps: usize) {
match self.config.dispatch {
GpuLbmDispatch::Cpu => {
for _ in 0..n_steps {
self.step_cpu();
}
}
GpuLbmDispatch::Simulated => {
for _ in 0..n_steps {
self.step_cpu();
}
}
}
}
fn step_cpu(&mut self) {
let nx = self.config.nx;
let ny = self.config.ny;
let tau = self.config.tau;
compute_macroscopic(
&self.state.f,
&mut self.state.rho,
&mut self.state.u,
nx,
ny,
);
collide(
&mut self.state.f,
&self.state.rho,
&self.state.u,
tau,
nx,
ny,
);
std::mem::swap(&mut self.state.f, &mut self.f_buf);
self.state.f.copy_from_slice(&self.f_buf);
stream(&mut self.state.f, nx, ny);
match self.config.boundary {
BoundaryCondition::Periodic => {
}
BoundaryCondition::NoSlip => {
apply_noslip_walls(&mut self.state.f, nx, ny);
}
BoundaryCondition::FreeSlip => {
apply_freeslip_walls(&mut self.state.f, nx, ny);
}
}
self.state.step += 1;
}
pub fn density(&self) -> &[f64] {
&self.state.rho
}
pub fn velocity(&self) -> &[[f64; 2]] {
&self.state.u
}
pub fn kinetic_energy(&self) -> f64 {
self.state
.rho
.iter()
.zip(self.state.u.iter())
.map(|(&r, &[ux, uy])| 0.5 * r * (ux * ux + uy * uy))
.sum()
}
pub fn total_mass(&self) -> f64 {
self.state.rho.iter().sum()
}
pub fn conservation_check(&self, initial_mass: f64) -> (f64, f64) {
let current_mass = self.total_mass();
let mx: f64 = self
.state
.rho
.iter()
.zip(self.state.u.iter())
.map(|(&r, &[ux, _])| r * ux)
.sum();
let mass_dev = (current_mass - initial_mass).abs() / initial_mass.abs().max(f64::EPSILON);
let momentum_dev = mx.abs() / initial_mass.abs().max(f64::EPSILON);
(mass_dev, momentum_dev)
}
pub fn nx(&self) -> usize {
self.config.nx
}
pub fn ny(&self) -> usize {
self.config.ny
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_config(nx: usize, ny: usize, tau: f64) -> LbmConfig {
LbmConfig {
nx,
ny,
tau,
boundary: BoundaryCondition::Periodic,
dispatch: GpuLbmDispatch::Cpu,
}
}
#[test]
fn test_mass_conservation_periodic() {
let mut sim = GpuLbm2D::new(make_config(16, 16, 0.8));
let m0 = sim.total_mass();
sim.step(100);
let m1 = sim.total_mass();
assert!(
(m1 - m0).abs() < 1e-10,
"mass not conserved: Δm = {:.3e}",
m1 - m0
);
}
#[test]
fn test_poiseuille_profile() {
let config = make_config(4, 20, 0.8);
let max_v = 0.05;
let sim = GpuLbm2D::poiseuille_init(config, max_v);
let ny = sim.ny();
let nx = sim.nx();
let mid_y = ny / 2;
let mid_cell = mid_y * nx;
let ux_mid = sim.velocity()[mid_cell][0];
let ux_bottom = sim.velocity()[0][0];
let ux_top = sim.velocity()[(ny - 1) * nx][0];
assert!(
ux_mid > ux_bottom,
"mid velocity {ux_mid} should exceed bottom {ux_bottom}"
);
assert!(
ux_mid > ux_top,
"mid velocity {ux_mid} should exceed top {ux_top}"
);
assert!(
ux_mid > 0.0,
"midpoint velocity should be positive, got {ux_mid}"
);
}
#[test]
fn test_kinetic_energy_positive() {
let config = make_config(8, 8, 0.8);
let max_v = 0.05;
let sim = GpuLbm2D::poiseuille_init(config, max_v);
let ke = sim.kinetic_energy();
assert!(ke > 0.0, "kinetic energy should be positive, got {ke}");
}
#[test]
fn test_no_nan_after_step() {
let mut sim = GpuLbm2D::new(make_config(8, 8, 0.8));
sim.step(50);
for &r in sim.density() {
assert!(r.is_finite(), "density contains non-finite value: {r}");
}
for &[ux, uy] in sim.velocity() {
assert!(
ux.is_finite() && uy.is_finite(),
"velocity contains non-finite values: [{ux}, {uy}]"
);
}
}
#[test]
fn test_equilibrium_f_all_positive() {
let sim = GpuLbm2D::new(make_config(4, 4, 0.8));
let f = &sim.state.f;
for (idx, &fq) in f.iter().enumerate() {
assert!(
fq > 0.0,
"f[{idx}] = {fq} should be strictly positive at equilibrium"
);
}
}
#[test]
fn test_simulated_dispatch_same_as_cpu() {
let config_cpu = LbmConfig {
nx: 8,
ny: 8,
tau: 0.8,
boundary: BoundaryCondition::Periodic,
dispatch: GpuLbmDispatch::Cpu,
};
let config_sim = LbmConfig {
dispatch: GpuLbmDispatch::Simulated,
..config_cpu.clone()
};
let mut sim_cpu = GpuLbm2D::new(config_cpu);
let mut sim_gpu = GpuLbm2D::new(config_sim);
sim_cpu.step(20);
sim_gpu.step(20);
let mass_cpu = sim_cpu.total_mass();
let mass_gpu = sim_gpu.total_mass();
assert!(
(mass_cpu - mass_gpu).abs() < 1e-12,
"CPU and simulated GPU diverge: Δm = {:.3e}",
mass_cpu - mass_gpu
);
}
#[test]
fn test_noslip_mass_conservation() {
let config = LbmConfig {
nx: 10,
ny: 10,
tau: 0.8,
boundary: BoundaryCondition::NoSlip,
dispatch: GpuLbmDispatch::Cpu,
};
let mut sim = GpuLbm2D::new(config);
let m0 = sim.total_mass();
sim.step(50);
let m1 = sim.total_mass();
assert!(
(m1 - m0).abs() < 1e-8,
"NoSlip mass not conserved: Δm = {:.3e}",
m1 - m0
);
}
#[test]
fn test_conservation_check_equilibrium() {
let mut sim = GpuLbm2D::new(make_config(8, 8, 0.8));
let m0 = sim.total_mass();
sim.step(30);
let (dm, _dpx) = sim.conservation_check(m0);
assert!(dm < 1e-8, "mass deviation too large: {dm:.3e}");
}
#[test]
fn test_accessors() {
let sim = GpuLbm2D::new(make_config(12, 10, 0.7));
assert_eq!(sim.nx(), 12);
assert_eq!(sim.ny(), 10);
assert_eq!(sim.density().len(), 120);
assert_eq!(sim.velocity().len(), 120);
}
}