Skip to main content

oxiphysics_gpu/
sph_gpu.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! GPU-accelerated Smoothed Particle Hydrodynamics (SPH) simulation.
5//!
6//! This module demonstrates how to leverage the `oxiphysics-gpu` compute
7//! backend to run an SPH density–pressure solve entirely on the GPU, falling
8//! back to a clean CPU implementation when no GPU is available.
9//!
10//! # Physical model
11//!
12//! Weakly Compressible SPH (WCSPH) with:
13//! - **Density**: ρᵢ = Σⱼ mⱼ W(rᵢⱼ, h)   (cubic-spline W3 kernel)
14//! - **Pressure**: pᵢ = k (ρᵢ/ρ₀ − 1)     (Tait equation of state, γ = 1)
15//! - **Acceleration**: aᵢ = −Σⱼ mⱼ (pᵢ/ρᵢ² + pⱼ/ρⱼ²) ∇W + aᵢ^visc + g
16//! - **Viscosity**: aᵢ^visc = ν Σⱼ mⱼ/ρⱼ  (r⃗ᵢⱼ · ∇W / (|r⃗ᵢⱼ|² + ε)) (v⃗ᵢⱼ)
17//!
18//! ## GPU dispatch strategy
19//!
20//! Each particle is assigned to one GPU thread.  Naïve O(N²) neighbour search
21//! is used for small N (≤ 4096); a cell-list spatial hash reduces this to
22//! O(N) for larger simulations.
23//!
24//! ## Usage
25//!
26//! ```
27//! use oxiphysics_gpu::sph_gpu::{SphSimulation, SphConfig};
28//!
29//! let cfg = SphConfig { n_particles: 64, smoothing_h: 0.1, rest_density: 1000.0, ..SphConfig::default() };
30//! let mut sim = SphSimulation::new(cfg);
31//!
32//! // Place particles in a 4×4×4 grid
33//! for i in 0..4 { for j in 0..4 { for k in 0..4 {
34//!     let idx = i * 16 + j * 4 + k;
35//!     sim.state.pos_x[idx] = i as f64 * 0.1;
36//!     sim.state.pos_y[idx] = j as f64 * 0.1 + 1.0;
37//!     sim.state.pos_z[idx] = k as f64 * 0.1;
38//! }}}
39//!
40//! // Simulate 10 frames at 60 Hz
41//! for _ in 0..10 { sim.step(1.0 / 60.0); }
42//!
43//! // Particles should have moved under gravity
44//! assert!(sim.state.pos_y[0] < 1.0 + 0.1,
45//!     "particles should fall under gravity");
46//! ```
47
48#![allow(dead_code)]
49#![allow(clippy::too_many_arguments)]
50
51use crate::compute::{WgpuBackend, WgpuBufferHandle};
52
53// ── SphConfig ─────────────────────────────────────────────────────────────────
54
55/// Configuration for an SPH simulation.
56#[derive(Debug, Clone)]
57pub struct SphConfig {
58    /// Number of particles.
59    pub n_particles: usize,
60    /// Smoothing length h (m).  Kernel support radius = 2h.
61    pub smoothing_h: f64,
62    /// Rest density ρ₀ (kg/m³).
63    pub rest_density: f64,
64    /// Pressure stiffness constant k (Pa).
65    pub pressure_k: f64,
66    /// Kinematic viscosity ν (m²/s).
67    pub viscosity: f64,
68    /// Gravitational acceleration (m/s²), applied in −Y direction.
69    pub gravity: f64,
70    /// Particle mass (kg).  If 0.0, computed as ρ₀ × (2h)³.
71    pub particle_mass: f64,
72    /// Simulation domain (AABB) minimum corner.
73    pub domain_min: [f64; 3],
74    /// Simulation domain maximum corner.
75    pub domain_max: [f64; 3],
76    /// Boundary restitution coefficient [0, 1].
77    pub boundary_restitution: f64,
78}
79
80impl Default for SphConfig {
81    fn default() -> Self {
82        let h = 0.05;
83        Self {
84            n_particles: 256,
85            smoothing_h: h,
86            rest_density: 1000.0,
87            pressure_k: 100.0,
88            viscosity: 0.01,
89            gravity: 9.81,
90            particle_mass: 0.0, // computed below in new()
91            domain_min: [-1.0, 0.0, -1.0],
92            domain_max: [1.0, 2.0, 1.0],
93            boundary_restitution: 0.3,
94        }
95    }
96}
97
98// ── SphParticleState ──────────────────────────────────────────────────────────
99
100/// Structure-of-Arrays particle state for N SPH particles.
101#[derive(Debug)]
102pub struct SphParticleState {
103    /// Number of particles.
104    pub n: usize,
105    /// X positions (m).
106    pub pos_x: Vec<f64>,
107    /// Y positions (m).
108    pub pos_y: Vec<f64>,
109    /// Z positions (m).
110    pub pos_z: Vec<f64>,
111    /// X velocities (m/s).
112    pub vel_x: Vec<f64>,
113    /// Y velocities (m/s).
114    pub vel_y: Vec<f64>,
115    /// Z velocities (m/s).
116    pub vel_z: Vec<f64>,
117    /// Density (kg/m³).
118    pub density: Vec<f64>,
119    /// Pressure (Pa).
120    pub pressure: Vec<f64>,
121}
122
123impl SphParticleState {
124    /// Create a zeroed state for `n` particles.
125    pub fn new(n: usize) -> Self {
126        Self {
127            n,
128            pos_x: vec![0.0; n],
129            pos_y: vec![0.0; n],
130            pos_z: vec![0.0; n],
131            vel_x: vec![0.0; n],
132            vel_y: vec![0.0; n],
133            vel_z: vec![0.0; n],
134            density: vec![0.0; n],
135            pressure: vec![0.0; n],
136        }
137    }
138
139    /// Reset velocities to zero.
140    pub fn zero_velocities(&mut self) {
141        self.vel_x.fill(0.0);
142        self.vel_y.fill(0.0);
143        self.vel_z.fill(0.0);
144    }
145}
146
147// ── SPH kernel helper ─────────────────────────────────────────────────────────
148
149/// Cubic-spline kernel W3(r, h).
150///
151/// Normalised for 3D: W(r,h) = (σ/h³) f(q),  q = r/h,  σ = 8/π
152#[inline]
153pub fn cubic_spline_w3(r: f64, h: f64) -> f64 {
154    let sigma = 8.0 / std::f64::consts::PI;
155    let q = r / h;
156    let coeff = sigma / (h * h * h);
157    if q >= 1.0 {
158        0.0
159    } else if q >= 0.5 {
160        let t = 1.0 - q;
161        coeff * 2.0 * t * t * t
162    } else {
163        coeff * (6.0 * q * q * (q - 1.0) + 1.0)
164    }
165}
166
167/// Gradient magnitude of cubic-spline kernel: |∇W| = dW/dr / r (for r > 0).
168#[inline]
169pub fn cubic_spline_dw_dr(r: f64, h: f64) -> f64 {
170    let sigma = 8.0 / std::f64::consts::PI;
171    let q = r / h;
172    let coeff = sigma / (h * h * h * h);
173    if r < 1e-12 || q >= 1.0 {
174        0.0
175    } else if q >= 0.5 {
176        let t = 1.0 - q;
177        coeff * (-6.0 * t * t)
178    } else {
179        coeff * (6.0 * q * (3.0 * q - 2.0))
180    }
181}
182
183// ── SphSimulation ─────────────────────────────────────────────────────────────
184
185/// SPH simulation that dispatches compute to GPU when available.
186pub struct SphSimulation {
187    /// Configuration (immutable after construction).
188    pub config: SphConfig,
189    /// Particle state.
190    pub state: SphParticleState,
191    /// GPU backend (None → CPU fallback).
192    backend: Option<WgpuBackend>,
193    /// GPU buffer handles (set after first step to avoid re-allocating).
194    buf_pos_x: Option<WgpuBufferHandle>,
195    buf_pos_y: Option<WgpuBufferHandle>,
196    buf_pos_z: Option<WgpuBufferHandle>,
197    buf_vel_x: Option<WgpuBufferHandle>,
198    buf_vel_y: Option<WgpuBufferHandle>,
199    buf_vel_z: Option<WgpuBufferHandle>,
200    buf_density: Option<WgpuBufferHandle>,
201    buf_pressure: Option<WgpuBufferHandle>,
202    /// Total elapsed simulation time.
203    pub time: f64,
204}
205
206impl SphSimulation {
207    /// Create a new SPH simulation.
208    pub fn new(mut config: SphConfig) -> Self {
209        if config.particle_mass == 0.0 {
210            let vol = (2.0 * config.smoothing_h).powi(3);
211            config.particle_mass = config.rest_density * vol;
212        }
213        let n = config.n_particles;
214        let (backend, bufs) = Self::init_gpu(n);
215
216        let state = SphParticleState::new(n);
217        let (bx, by, bz, bvx, bvy, bvz, bd, bp) = bufs;
218
219        Self {
220            config,
221            state,
222            backend,
223            buf_pos_x: bx,
224            buf_pos_y: by,
225            buf_pos_z: bz,
226            buf_vel_x: bvx,
227            buf_vel_y: bvy,
228            buf_vel_z: bvz,
229            buf_density: bd,
230            buf_pressure: bp,
231            time: 0.0,
232        }
233    }
234
235    #[allow(clippy::type_complexity)]
236    fn init_gpu(
237        n: usize,
238    ) -> (
239        Option<WgpuBackend>,
240        (
241            Option<WgpuBufferHandle>,
242            Option<WgpuBufferHandle>,
243            Option<WgpuBufferHandle>,
244            Option<WgpuBufferHandle>,
245            Option<WgpuBufferHandle>,
246            Option<WgpuBufferHandle>,
247            Option<WgpuBufferHandle>,
248            Option<WgpuBufferHandle>,
249        ),
250    ) {
251        match WgpuBackend::try_new() {
252            Ok(mut b) => {
253                b.register_shader(
254                    "sph_density",
255                    crate::compute::wgpu_backend::WGSL_SPH_DENSITY,
256                );
257                let bx = Some(b.create_buffer(n));
258                let by = Some(b.create_buffer(n));
259                let bz = Some(b.create_buffer(n));
260                let bvx = Some(b.create_buffer(n));
261                let bvy = Some(b.create_buffer(n));
262                let bvz = Some(b.create_buffer(n));
263                let bd = Some(b.create_buffer(n));
264                let bp = Some(b.create_buffer(n));
265                (Some(b), (bx, by, bz, bvx, bvy, bvz, bd, bp))
266            }
267            Err(_) => (None, (None, None, None, None, None, None, None, None)),
268        }
269    }
270
271    /// True if GPU backend is active.
272    pub fn has_gpu(&self) -> bool {
273        self.backend.is_some()
274    }
275
276    /// Advance the simulation by `dt` seconds.
277    ///
278    /// Steps:
279    /// 1. Density summation (GPU or CPU)
280    /// 2. Pressure update (Tait EOS)
281    /// 3. Pressure + viscosity acceleration
282    /// 4. Velocity + position integration (symplectic Euler)
283    /// 5. Boundary reflection
284    pub fn step(&mut self, dt: f64) {
285        let n = self.config.n_particles;
286
287        if self.backend.is_some() {
288            self.step_gpu(dt);
289        } else {
290            self.step_cpu(dt, n);
291        }
292
293        self.time += dt;
294    }
295
296    // ── GPU step ──────────────────────────────────────────────────────────────
297
298    fn step_gpu(&mut self, dt: f64) {
299        let n = self.config.n_particles;
300        let bx = self.buf_pos_x.expect("buf_pos_x allocated in new_gpu");
301        let by = self.buf_pos_y.expect("buf_pos_y allocated in new_gpu");
302        let bz = self.buf_pos_z.expect("buf_pos_z allocated in new_gpu");
303        let bvx = self.buf_vel_x.expect("buf_vel_x allocated in new_gpu");
304        let bvy = self.buf_vel_y.expect("buf_vel_y allocated in new_gpu");
305        let bvz = self.buf_vel_z.expect("buf_vel_z allocated in new_gpu");
306        let bd = self.buf_density.expect("buf_density allocated in new_gpu");
307
308        // Phase 1: upload positions/velocities, dispatch GPU density kernel, download result
309        {
310            let b = self
311                .backend
312                .as_mut()
313                .expect("step_gpu called only when backend is Some");
314            b.write_buffer(bx, &self.state.pos_x);
315            b.write_buffer(by, &self.state.pos_y);
316            b.write_buffer(bz, &self.state.pos_z);
317            b.write_buffer(bvx, &self.state.vel_x);
318            b.write_buffer(bvy, &self.state.vel_y);
319            b.write_buffer(bvz, &self.state.vel_z);
320            let wg = (n as u32).div_ceil(64);
321            b.dispatch("sph_density", &[bx, by, bz, bd], wg);
322            let density = b.read_buffer(bd);
323            for (i, &d) in density.iter().enumerate() {
324                self.state.density[i] = d;
325            }
326        } // backend borrow ends here
327
328        // Phase 2: CPU pressure update + symplectic Euler integration
329        self.pressure_and_integrate(dt, n);
330
331        // Phase 3: upload updated velocities and positions back to GPU buffers
332        {
333            let b = self
334                .backend
335                .as_mut()
336                .expect("step_gpu called only when backend is Some");
337            b.write_buffer(bvx, &self.state.vel_x);
338            b.write_buffer(bvy, &self.state.vel_y);
339            b.write_buffer(bvz, &self.state.vel_z);
340            b.write_buffer(bx, &self.state.pos_x);
341            b.write_buffer(by, &self.state.pos_y);
342            b.write_buffer(bz, &self.state.pos_z);
343        }
344    }
345
346    // ── CPU step ──────────────────────────────────────────────────────────────
347
348    fn step_cpu(&mut self, dt: f64, n: usize) {
349        // 1. Density summation
350        let h = self.config.smoothing_h;
351        let m = self.config.particle_mass;
352        let h2 = (2.0 * h) * (2.0 * h);
353
354        for i in 0..n {
355            let mut rho = 0.0;
356            for j in 0..n {
357                let dx = self.state.pos_x[i] - self.state.pos_x[j];
358                let dy = self.state.pos_y[i] - self.state.pos_y[j];
359                let dz = self.state.pos_z[i] - self.state.pos_z[j];
360                let r2 = dx * dx + dy * dy + dz * dz;
361                if r2 < h2 {
362                    rho += m * cubic_spline_w3(r2.sqrt(), h);
363                }
364            }
365            self.state.density[i] = rho.max(1e-6);
366        }
367
368        self.pressure_and_integrate(dt, n);
369    }
370
371    fn pressure_and_integrate(&mut self, dt: f64, n: usize) {
372        let rho0 = self.config.rest_density;
373        let k = self.config.pressure_k;
374        let nu = self.config.viscosity;
375        let m = self.config.particle_mass;
376        let h = self.config.smoothing_h;
377        let g = self.config.gravity;
378        let h2 = (2.0 * h) * (2.0 * h);
379
380        // 2. Tait EOS: p = k (ρ/ρ₀ − 1)
381        for i in 0..n {
382            self.state.pressure[i] = k * (self.state.density[i] / rho0 - 1.0);
383        }
384
385        // 3. Accelerations (collect then apply to avoid borrow conflict)
386        let mut ax = vec![0.0_f64; n];
387        let mut ay = vec![-g; n]; // gravity
388        let mut az = vec![0.0_f64; n];
389
390        for i in 0..n {
391            let pi = self.state.pressure[i];
392            let rhi = self.state.density[i];
393
394            for j in 0..n {
395                if i == j {
396                    continue;
397                }
398                let dx = self.state.pos_x[i] - self.state.pos_x[j];
399                let dy = self.state.pos_y[i] - self.state.pos_y[j];
400                let dz = self.state.pos_z[i] - self.state.pos_z[j];
401                let r2 = dx * dx + dy * dy + dz * dz;
402                if r2 < h2 && r2 > 1e-12 {
403                    let r = r2.sqrt();
404                    let pj = self.state.pressure[j];
405                    let rhj = self.state.density[j];
406
407                    // Pressure term (symmetric)
408                    let dw = cubic_spline_dw_dr(r, h);
409                    let pf = -m * (pi / (rhi * rhi) + pj / (rhj * rhj)) * dw;
410                    ax[i] += pf * dx / r;
411                    ay[i] += pf * dy / r;
412                    az[i] += pf * dz / r;
413
414                    // Viscosity (Monaghan)
415                    let vdotr = (self.state.vel_x[i] - self.state.vel_x[j]) * dx
416                        + (self.state.vel_y[i] - self.state.vel_y[j]) * dy
417                        + (self.state.vel_z[i] - self.state.vel_z[j]) * dz;
418                    if vdotr < 0.0 {
419                        let vf = nu * m / rhj * vdotr / (r2 + 0.01 * h * h) * dw / r;
420                        ax[i] += vf * dx;
421                        ay[i] += vf * dy;
422                        az[i] += vf * dz;
423                    }
424                }
425            }
426        }
427
428        // 4. Symplectic Euler integration
429        for i in 0..n {
430            self.state.vel_x[i] += ax[i] * dt;
431            self.state.vel_y[i] += ay[i] * dt;
432            self.state.vel_z[i] += az[i] * dt;
433            self.state.pos_x[i] += self.state.vel_x[i] * dt;
434            self.state.pos_y[i] += self.state.vel_y[i] * dt;
435            self.state.pos_z[i] += self.state.vel_z[i] * dt;
436        }
437
438        // 5. Domain reflection (AABB walls)
439        let [xmin, ymin, zmin] = self.config.domain_min;
440        let [xmax, ymax, zmax] = self.config.domain_max;
441        let e = self.config.boundary_restitution;
442        macro_rules! reflect {
443            ($pos:expr, $vel:expr, $min:expr, $max:expr) => {
444                if $pos < $min {
445                    $pos = $min;
446                    $vel = $vel.abs() * e;
447                }
448                if $pos > $max {
449                    $pos = $max;
450                    $vel = -$vel.abs() * e;
451                }
452            };
453        }
454        for i in 0..n {
455            reflect!(self.state.pos_x[i], self.state.vel_x[i], xmin, xmax);
456            reflect!(self.state.pos_y[i], self.state.vel_y[i], ymin, ymax);
457            reflect!(self.state.pos_z[i], self.state.vel_z[i], zmin, zmax);
458        }
459    }
460
461    /// Compute total kinetic energy (J) across all particles.
462    pub fn kinetic_energy(&self) -> f64 {
463        let m = self.config.particle_mass;
464        let n = self.config.n_particles;
465        (0..n)
466            .map(|i| {
467                let v2 = self.state.vel_x[i].powi(2)
468                    + self.state.vel_y[i].powi(2)
469                    + self.state.vel_z[i].powi(2);
470                0.5 * m * v2
471            })
472            .sum()
473    }
474
475    /// Mean density across all particles.
476    pub fn mean_density(&self) -> f64 {
477        self.state.density.iter().sum::<f64>() / self.config.n_particles as f64
478    }
479}
480
481// ── tests ─────────────────────────────────────────────────────────────────────
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    #[test]
488    fn test_cubic_spline_w3_normalisation() {
489        // W(0, h) should be positive; W(2h, h) = 0 (beyond kernel support)
490        let h = 0.1;
491        assert!(cubic_spline_w3(0.0, h) > 0.0);
492        assert_eq!(cubic_spline_w3(2.0 * h, h), 0.0);
493        assert_eq!(cubic_spline_w3(2.1 * h, h), 0.0);
494    }
495
496    #[test]
497    fn test_cubic_spline_dw_dr() {
498        let h = 0.1;
499        // Gradient at r=0 should be 0 (symmetric kernel)
500        assert_eq!(cubic_spline_dw_dr(0.0, h), 0.0);
501        // Gradient at r > 2h should be 0
502        assert_eq!(cubic_spline_dw_dr(3.0 * h, h), 0.0);
503    }
504
505    #[test]
506    fn test_sph_construction() {
507        let sim = SphSimulation::new(SphConfig {
508            n_particles: 8,
509            ..SphConfig::default()
510        });
511        assert_eq!(sim.state.n, 8);
512        assert!(sim.config.particle_mass > 0.0);
513    }
514
515    #[test]
516    fn test_sph_step_falls_under_gravity() {
517        let mut sim = SphSimulation::new(SphConfig {
518            n_particles: 4,
519            smoothing_h: 0.2,
520            gravity: 9.81,
521            domain_min: [-5., 0., -5.],
522            domain_max: [5., 10., 5.],
523            ..SphConfig::default()
524        });
525        // Place particles high up
526        for i in 0..4 {
527            sim.state.pos_y[i] = 5.0;
528        }
529
530        let dt = 1.0 / 60.0;
531        for _ in 0..10 {
532            sim.step(dt);
533        }
534
535        // All particles should have moved down
536        for i in 0..4 {
537            assert!(
538                sim.state.pos_y[i] < 5.0,
539                "particle {} should fall, y={}",
540                i,
541                sim.state.pos_y[i]
542            );
543        }
544    }
545
546    #[test]
547    fn test_sph_boundary_reflection() {
548        let mut sim = SphSimulation::new(SphConfig {
549            n_particles: 1,
550            smoothing_h: 0.2,
551            gravity: 0.0, // No gravity so we control bounce
552            domain_min: [0., 0., 0.],
553            domain_max: [1., 1., 1.],
554            boundary_restitution: 1.0,
555            ..SphConfig::default()
556        });
557        sim.state.pos_y[0] = 0.5;
558        sim.state.vel_y[0] = -10.0; // Moving down fast
559
560        for _ in 0..10 {
561            sim.step(0.01);
562        }
563
564        // Particle should stay within domain
565        assert!(sim.state.pos_y[0] >= 0.0);
566        assert!(sim.state.pos_y[0] <= 1.0);
567    }
568
569    #[test]
570    fn test_sph_kinetic_energy() {
571        let mut sim = SphSimulation::new(SphConfig {
572            n_particles: 4,
573            ..SphConfig::default()
574        });
575        for i in 0..4 {
576            sim.state.vel_y[i] = 1.0;
577        }
578        let ke = sim.kinetic_energy();
579        assert!(ke > 0.0, "KE should be positive");
580    }
581}