Skip to main content

oxiphysics_gpu/
gpu_sph_solver.rs

1#![allow(clippy::needless_range_loop)]
2// Copyright 2026 COOLJAPAN OU (Team KitaSan)
3// SPDX-License-Identifier: Apache-2.0
4
5//! GPU-side SPH solver — CPU mock backend.
6//!
7//! Implements a complete Smoothed Particle Hydrodynamics (SPH) solver pipeline
8//! using plain Rust loops as a CPU fallback. The API mirrors what would run on
9//! a GPU kernel dispatch, making it straightforward to swap in a real GPU
10//! backend without changing call-sites.
11
12#![allow(dead_code)]
13
14use std::f32::consts::PI;
15
16// ── Data structures ──────────────────────────────────────────────────────────
17
18/// A single SPH particle stored on the GPU buffer.
19#[derive(Debug, Clone, Copy, PartialEq)]
20pub struct GpuSphParticle {
21    /// Particle position \[x, y, z\] in metres.
22    pub pos: [f32; 3],
23    /// Particle velocity \[vx, vy, vz\] in m/s.
24    pub vel: [f32; 3],
25    /// Particle density in kg/m³.
26    pub density: f32,
27    /// Particle pressure in Pa.
28    pub pressure: f32,
29    /// Particle mass in kg.
30    pub mass: f32,
31}
32
33/// Simulation parameters for the GPU SPH solver.
34#[derive(Debug, Clone, Copy, PartialEq)]
35pub struct GpuSphParams {
36    /// SPH smoothing kernel radius h in metres.
37    pub kernel_radius: f32,
38    /// Tait EOS stiffness constant k.
39    pub eos_k: f32,
40    /// Tait EOS exponent gamma.
41    pub eos_gamma: f32,
42    /// Dynamic viscosity coefficient μ.
43    pub viscosity: f32,
44    /// Number of particles.
45    pub n_particles: usize,
46}
47
48/// GPU-side buffer holding all per-particle arrays.
49#[derive(Debug, Clone)]
50pub struct GpuSphBuffer {
51    /// Per-particle positions \[x, y, z\].
52    pub positions: Vec<[f32; 3]>,
53    /// Per-particle velocities \[vx, vy, vz\].
54    pub velocities: Vec<[f32; 3]>,
55    /// Per-particle densities (kg/m³).
56    pub densities: Vec<f32>,
57    /// Per-particle pressures (Pa).
58    pub pressures: Vec<f32>,
59}
60
61impl GpuSphBuffer {
62    /// Allocate a new buffer for `n` particles, all initialised to zero.
63    pub fn new(n: usize) -> Self {
64        Self {
65            positions: vec![[0.0; 3]; n],
66            velocities: vec![[0.0; 3]; n],
67            densities: vec![0.0; n],
68            pressures: vec![0.0; n],
69        }
70    }
71
72    /// Number of particles in this buffer.
73    pub fn len(&self) -> usize {
74        self.positions.len()
75    }
76
77    /// Returns `true` if the buffer is empty.
78    pub fn is_empty(&self) -> bool {
79        self.positions.is_empty()
80    }
81}
82
83// ── Kernel functions ─────────────────────────────────────────────────────────
84
85/// Wendland C2 SPH kernel W(r, h).
86///
87/// Returns the kernel value at distance `r` with smoothing length `h`.
88/// The kernel is zero for `r >= h`.
89pub fn sph_kernel_gpu(r: f32, h: f32) -> f32 {
90    if h <= 0.0 || r >= h {
91        return 0.0;
92    }
93    let q = r / h;
94    // 3-D Wendland C2: alpha_D * (1 - q/2)^4 * (2q + 1)
95    // alpha_D = 21 / (2 * pi * h^3)
96    let alpha = 21.0 / (2.0 * PI * h * h * h);
97    let t = 1.0 - 0.5 * q;
98    let t2 = t * t;
99    let t4 = t2 * t2;
100    alpha * t4 * (2.0 * q + 1.0)
101}
102
103/// Wendland C2 SPH kernel gradient magnitude dW/dr.
104///
105/// Returns the magnitude of the kernel gradient at distance `r`.
106pub fn sph_kernel_grad_gpu(r: f32, h: f32) -> f32 {
107    if h <= 0.0 || r >= h || r < 1e-12 {
108        return 0.0;
109    }
110    let q = r / h;
111    let alpha = 21.0 / (2.0 * PI * h * h * h);
112    let t = 1.0 - 0.5 * q;
113    let t2 = t * t;
114    let t3 = t2 * t;
115    // d/dr [ t^4 (2q+1) ] = [ -4*(1/2h)*t^3*(2q+1) + t^4*(2/h) ]
116    //                      = t^3 / h * [ -2*(2q+1) + 2*t ]
117    //                      = t^3 / h * [ -4q - 2 + 2 - q ] = t^3/h * (-5q)
118    // actually: d/dq [ t^4(2q+1) ] * (1/h)
119    // = [ 4*t^3*(-1/2)*(2q+1) + t^4*2 ] / h
120    // = t^3 [ -2(2q+1) + 2t ] / h
121    // = t^3 [ -4q-2 + 2 - q ] / h = t^3(-5q) / h
122    alpha * t3 * (-5.0 * q) / h
123}
124
125// ── Density computation ───────────────────────────────────────────────────────
126
127/// Compute per-particle densities via O(N²) neighbour sum.
128///
129/// `ρ_i = Σ_j m_j * W(|r_i - r_j|, h)`
130pub fn compute_density_gpu(buf: &GpuSphBuffer, params: &GpuSphParams) -> Vec<f32> {
131    let n = buf.positions.len();
132    let h = params.kernel_radius;
133    // Assume unit mass per particle (mass can be encoded separately)
134    let mass = 1.0_f32;
135    let mut densities = vec![0.0_f32; n];
136    for i in 0..n {
137        let pi = buf.positions[i];
138        let mut rho = 0.0_f32;
139        for j in 0..n {
140            let pj = buf.positions[j];
141            let dx = pi[0] - pj[0];
142            let dy = pi[1] - pj[1];
143            let dz = pi[2] - pj[2];
144            let r = (dx * dx + dy * dy + dz * dz).sqrt();
145            rho += mass * sph_kernel_gpu(r, h);
146        }
147        densities[i] = rho;
148    }
149    densities
150}
151
152// ── Pressure computation ──────────────────────────────────────────────────────
153
154/// Compute per-particle pressures using the Tait equation of state.
155///
156/// `P = k * (ρ/ρ₀)^γ - k` where `ρ₀ = 1000` kg/m³.
157pub fn compute_pressure_gpu(densities: &[f32], params: &GpuSphParams) -> Vec<f32> {
158    let rho0 = 1000.0_f32;
159    densities
160        .iter()
161        .map(|&rho| {
162            let ratio = rho / rho0;
163            params.eos_k * (ratio.powf(params.eos_gamma) - 1.0)
164        })
165        .collect()
166}
167
168// ── Force computation ─────────────────────────────────────────────────────────
169
170/// Compute pressure forces on all particles.
171///
172/// Uses the symmetric SPH momentum equation:
173/// `f_i = -Σ_j m_j (P_i/ρ_i² + P_j/ρ_j²) ∇W`
174#[allow(clippy::too_many_arguments)]
175pub fn compute_pressure_force_gpu(
176    buf: &GpuSphBuffer,
177    pressures: &[f32],
178    params: &GpuSphParams,
179) -> Vec<[f32; 3]> {
180    let n = buf.positions.len();
181    let h = params.kernel_radius;
182    let mass = 1.0_f32;
183    let mut forces = vec![[0.0_f32; 3]; n];
184
185    for i in 0..n {
186        let pi = buf.positions[i];
187        let rho_i = buf.densities[i].max(1e-6);
188        let p_i = pressures[i];
189        let mut fx = 0.0_f32;
190        let mut fy = 0.0_f32;
191        let mut fz = 0.0_f32;
192
193        for j in 0..n {
194            if i == j {
195                continue;
196            }
197            let pj = buf.positions[j];
198            let dx = pi[0] - pj[0];
199            let dy = pi[1] - pj[1];
200            let dz = pi[2] - pj[2];
201            let r = (dx * dx + dy * dy + dz * dz).sqrt();
202            if r < 1e-12 {
203                continue;
204            }
205            let rho_j = buf.densities[j].max(1e-6);
206            let p_j = pressures[j];
207            let grad_mag = sph_kernel_grad_gpu(r, h);
208            let coeff = -mass * (p_i / (rho_i * rho_i) + p_j / (rho_j * rho_j)) * grad_mag / r;
209            fx += coeff * dx;
210            fy += coeff * dy;
211            fz += coeff * dz;
212        }
213        forces[i] = [fx, fy, fz];
214    }
215    forces
216}
217
218/// Compute viscosity forces on all particles.
219///
220/// Uses the standard Monaghan viscosity formulation.
221pub fn compute_viscosity_force_gpu(buf: &GpuSphBuffer, params: &GpuSphParams) -> Vec<[f32; 3]> {
222    let n = buf.positions.len();
223    let h = params.kernel_radius;
224    let mu = params.viscosity;
225    let mass = 1.0_f32;
226    let mut forces = vec![[0.0_f32; 3]; n];
227
228    for i in 0..n {
229        let pi = buf.positions[i];
230        let vi = buf.velocities[i];
231        let rho_i = buf.densities[i].max(1e-6);
232        let mut fx = 0.0_f32;
233        let mut fy = 0.0_f32;
234        let mut fz = 0.0_f32;
235
236        for j in 0..n {
237            if i == j {
238                continue;
239            }
240            let pj = buf.positions[j];
241            let vj = buf.velocities[j];
242            let dx = pi[0] - pj[0];
243            let dy = pi[1] - pj[1];
244            let dz = pi[2] - pj[2];
245            let r = (dx * dx + dy * dy + dz * dz).sqrt();
246            if r < 1e-12 {
247                continue;
248            }
249            let rho_j = buf.densities[j].max(1e-6);
250            let lap = sph_kernel_grad_gpu(r, h); // use gradient magnitude as Laplacian approx
251            let dvx = vj[0] - vi[0];
252            let dvy = vj[1] - vi[1];
253            let dvz = vj[2] - vi[2];
254            let coeff = mu * mass / rho_j * lap / r;
255            fx += coeff * dvx;
256            fy += coeff * dvy;
257            fz += coeff * dvz;
258        }
259        forces[i] = [
260            forces[i][0] + fx / rho_i,
261            forces[i][1] + fy / rho_i,
262            forces[i][2] + fz / rho_i,
263        ];
264    }
265    forces
266}
267
268// ── Integration ───────────────────────────────────────────────────────────────
269
270/// Semi-implicit Euler integration step.
271///
272/// Updates velocities and positions in-place:
273/// `v += f * dt`, `x += v * dt`.
274pub fn integrate_sph_gpu(buf: &mut GpuSphBuffer, forces: &[[f32; 3]], dt: f32) {
275    let n = buf.positions.len();
276    for i in 0..n {
277        buf.velocities[i][0] += forces[i][0] * dt;
278        buf.velocities[i][1] += forces[i][1] * dt;
279        buf.velocities[i][2] += forces[i][2] * dt;
280        buf.positions[i][0] += buf.velocities[i][0] * dt;
281        buf.positions[i][1] += buf.velocities[i][1] * dt;
282        buf.positions[i][2] += buf.velocities[i][2] * dt;
283    }
284}
285
286// ── Full step ─────────────────────────────────────────────────────────────────
287
288/// Perform one complete SPH time-step.
289///
290/// Executes density → pressure → force → integrate in sequence.
291pub fn gpu_sph_step(buf: &mut GpuSphBuffer, params: &GpuSphParams, dt: f32) {
292    let densities = compute_density_gpu(buf, params);
293    let pressures = compute_pressure_gpu(&densities, params);
294    buf.densities = densities;
295    buf.pressures = pressures.clone();
296    let pf = compute_pressure_force_gpu(buf, &pressures, params);
297    let vf = compute_viscosity_force_gpu(buf, params);
298    let n = buf.positions.len();
299    let mut total_forces = vec![[0.0_f32; 3]; n];
300    for i in 0..n {
301        total_forces[i][0] = pf[i][0] + vf[i][0];
302        total_forces[i][1] = pf[i][1] + vf[i][1];
303        total_forces[i][2] = pf[i][2] + vf[i][2];
304    }
305    integrate_sph_gpu(buf, &total_forces, dt);
306}
307
308// ── Tests ─────────────────────────────────────────────────────────────────────
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    fn make_buf(n: usize) -> GpuSphBuffer {
315        let mut buf = GpuSphBuffer::new(n);
316        for i in 0..n {
317            buf.positions[i] = [i as f32 * 0.1, 0.0, 0.0];
318            buf.velocities[i] = [0.0; 3];
319            buf.densities[i] = 1000.0;
320            buf.pressures[i] = 0.0;
321        }
322        buf
323    }
324
325    fn default_params(n: usize) -> GpuSphParams {
326        GpuSphParams {
327            kernel_radius: 0.5,
328            eos_k: 1.0,
329            eos_gamma: 7.0,
330            viscosity: 0.01,
331            n_particles: n,
332        }
333    }
334
335    #[test]
336    fn test_gpu_sph_particle_fields() {
337        let p = GpuSphParticle {
338            pos: [1.0, 2.0, 3.0],
339            vel: [0.1, 0.2, 0.3],
340            density: 1000.0,
341            pressure: 101325.0,
342            mass: 0.001,
343        };
344        assert_eq!(p.pos[0], 1.0);
345        assert_eq!(p.mass, 0.001);
346    }
347
348    #[test]
349    fn test_gpu_sph_params_fields() {
350        let params = default_params(10);
351        assert_eq!(params.n_particles, 10);
352        assert!(params.kernel_radius > 0.0);
353    }
354
355    #[test]
356    fn test_gpu_sph_buffer_new() {
357        let buf = GpuSphBuffer::new(5);
358        assert_eq!(buf.len(), 5);
359        assert!(!buf.is_empty());
360    }
361
362    #[test]
363    fn test_gpu_sph_buffer_empty() {
364        let buf = GpuSphBuffer::new(0);
365        assert!(buf.is_empty());
366    }
367
368    #[test]
369    fn test_sph_kernel_gpu_zero_at_boundary() {
370        let w = sph_kernel_gpu(0.5, 0.5);
371        assert_eq!(w, 0.0);
372    }
373
374    #[test]
375    fn test_sph_kernel_gpu_zero_beyond() {
376        let w = sph_kernel_gpu(1.0, 0.5);
377        assert_eq!(w, 0.0);
378    }
379
380    #[test]
381    fn test_sph_kernel_gpu_positive_inside() {
382        let w = sph_kernel_gpu(0.1, 0.5);
383        assert!(w > 0.0);
384    }
385
386    #[test]
387    fn test_sph_kernel_gpu_peak_at_zero() {
388        let w0 = sph_kernel_gpu(0.0, 0.5);
389        let w1 = sph_kernel_gpu(0.2, 0.5);
390        assert!(w0 > w1);
391    }
392
393    #[test]
394    fn test_sph_kernel_gpu_zero_h() {
395        assert_eq!(sph_kernel_gpu(0.1, 0.0), 0.0);
396    }
397
398    #[test]
399    fn test_sph_kernel_grad_gpu_zero_h() {
400        assert_eq!(sph_kernel_grad_gpu(0.1, 0.0), 0.0);
401    }
402
403    #[test]
404    fn test_sph_kernel_grad_gpu_zero_r() {
405        assert_eq!(sph_kernel_grad_gpu(0.0, 0.5), 0.0);
406    }
407
408    #[test]
409    fn test_sph_kernel_grad_gpu_negative_inside() {
410        // Gradient magnitude is negative (kernel decreases with r)
411        let g = sph_kernel_grad_gpu(0.2, 0.5);
412        assert!(g < 0.0);
413    }
414
415    #[test]
416    fn test_sph_kernel_grad_gpu_zero_at_boundary() {
417        let g = sph_kernel_grad_gpu(0.5, 0.5);
418        assert_eq!(g, 0.0);
419    }
420
421    #[test]
422    fn test_compute_density_gpu_nonzero() {
423        let buf = make_buf(4);
424        let params = default_params(4);
425        let densities = compute_density_gpu(&buf, &params);
426        assert_eq!(densities.len(), 4);
427        // Particles within kernel_radius should contribute
428        assert!(densities.iter().any(|&d| d > 0.0));
429    }
430
431    #[test]
432    fn test_compute_density_gpu_length() {
433        let buf = make_buf(6);
434        let params = default_params(6);
435        let d = compute_density_gpu(&buf, &params);
436        assert_eq!(d.len(), 6);
437    }
438
439    #[test]
440    fn test_compute_density_gpu_empty() {
441        let buf = GpuSphBuffer::new(0);
442        let params = default_params(0);
443        let d = compute_density_gpu(&buf, &params);
444        assert!(d.is_empty());
445    }
446
447    #[test]
448    fn test_compute_pressure_gpu_positive() {
449        let params = default_params(3);
450        let densities = vec![1100.0_f32, 1000.0_f32, 900.0_f32];
451        let pressures = compute_pressure_gpu(&densities, &params);
452        assert_eq!(pressures.len(), 3);
453        // Higher density => higher pressure
454        assert!(pressures[0] > pressures[1]);
455    }
456
457    #[test]
458    fn test_compute_pressure_gpu_rest_density() {
459        let params = default_params(1);
460        let d = vec![1000.0_f32]; // exactly rest density
461        let p = compute_pressure_gpu(&d, &params);
462        // (1.0)^gamma - 1 = 0 => P = 0
463        assert!(p[0].abs() < 1e-3);
464    }
465
466    #[test]
467    fn test_compute_pressure_force_gpu_length() {
468        let buf = make_buf(4);
469        let params = default_params(4);
470        let pressures = vec![100.0_f32; 4];
471        let forces = compute_pressure_force_gpu(&buf, &pressures, &params);
472        assert_eq!(forces.len(), 4);
473    }
474
475    #[test]
476    fn test_compute_viscosity_force_gpu_length() {
477        let buf = make_buf(4);
478        let params = default_params(4);
479        let forces = compute_viscosity_force_gpu(&buf, &params);
480        assert_eq!(forces.len(), 4);
481    }
482
483    #[test]
484    fn test_integrate_sph_gpu_position_update() {
485        let mut buf = GpuSphBuffer::new(2);
486        buf.velocities[0] = [1.0, 0.0, 0.0];
487        buf.velocities[1] = [0.0, 1.0, 0.0];
488        let forces = vec![[0.0_f32; 3]; 2];
489        integrate_sph_gpu(&mut buf, &forces, 0.1);
490        assert!((buf.positions[0][0] - 0.1).abs() < 1e-5);
491        assert!((buf.positions[1][1] - 0.1).abs() < 1e-5);
492    }
493
494    #[test]
495    fn test_integrate_sph_gpu_velocity_update() {
496        let mut buf = GpuSphBuffer::new(1);
497        let forces = vec![[2.0_f32, 0.0, 0.0]];
498        integrate_sph_gpu(&mut buf, &forces, 0.5);
499        assert!((buf.velocities[0][0] - 1.0).abs() < 1e-5);
500    }
501
502    #[test]
503    fn test_gpu_sph_step_runs() {
504        let mut buf = make_buf(5);
505        let params = default_params(5);
506        gpu_sph_step(&mut buf, &params, 0.001);
507        // After one step densities should be set
508        assert!(buf.densities.iter().any(|&d| d >= 0.0));
509    }
510
511    #[test]
512    fn test_gpu_sph_step_no_nan() {
513        let mut buf = make_buf(5);
514        let params = default_params(5);
515        gpu_sph_step(&mut buf, &params, 0.001);
516        for i in 0..5 {
517            assert!(!buf.positions[i][0].is_nan());
518            assert!(!buf.densities[i].is_nan());
519        }
520    }
521
522    #[test]
523    fn test_gpu_sph_step_pressure_set() {
524        let mut buf = make_buf(3);
525        let params = default_params(3);
526        gpu_sph_step(&mut buf, &params, 0.001);
527        assert_eq!(buf.pressures.len(), 3);
528    }
529
530    #[test]
531    fn test_sph_kernel_symmetry() {
532        let h = 0.5;
533        let w1 = sph_kernel_gpu(0.1, h);
534        let w2 = sph_kernel_gpu(0.1, h);
535        assert!((w1 - w2).abs() < 1e-8);
536    }
537
538    #[test]
539    fn test_sph_kernel_decreasing() {
540        let h = 1.0;
541        let mut prev = sph_kernel_gpu(0.0, h);
542        for i in 1..10 {
543            let r = i as f32 * 0.09;
544            let w = sph_kernel_gpu(r, h);
545            assert!(w <= prev + 1e-6);
546            prev = w;
547        }
548    }
549
550    #[test]
551    fn test_compute_density_uniform_grid() {
552        // All particles at same location -> all have same density
553        let n = 4;
554        let buf = GpuSphBuffer::new(n);
555        // All at origin
556        let params = default_params(n);
557        let d = compute_density_gpu(&buf, &params);
558        // All densities should be equal
559        for i in 1..n {
560            assert!((d[i] - d[0]).abs() < 1e-5);
561        }
562        let _ = buf.positions[0]; // suppress warning
563    }
564
565    #[test]
566    fn test_pressure_force_zero_pressure() {
567        let buf = make_buf(3);
568        let params = default_params(3);
569        let pressures = vec![0.0_f32; 3];
570        let forces = compute_pressure_force_gpu(&buf, &pressures, &params);
571        for f in &forces {
572            assert!(f[0].abs() < 1e-6 && f[1].abs() < 1e-6 && f[2].abs() < 1e-6);
573        }
574    }
575
576    #[test]
577    fn test_viscosity_force_same_velocity() {
578        // Particles with same velocity should have zero viscosity force
579        let mut buf = make_buf(3);
580        for i in 0..3 {
581            buf.velocities[i] = [1.0, 0.5, 0.0];
582        }
583        let params = default_params(3);
584        let forces = compute_viscosity_force_gpu(&buf, &params);
585        for f in &forces {
586            assert!(f[0].abs() < 1e-4 && f[1].abs() < 1e-4 && f[2].abs() < 1e-4);
587        }
588    }
589
590    #[test]
591    fn test_buf_clone() {
592        let buf = make_buf(3);
593        let buf2 = buf.clone();
594        assert_eq!(buf2.len(), 3);
595    }
596
597    #[test]
598    fn test_params_copy() {
599        let p = default_params(5);
600        let p2 = p;
601        assert_eq!(p2.n_particles, 5);
602    }
603
604    #[test]
605    fn test_integrate_multiple_steps() {
606        let mut buf = GpuSphBuffer::new(1);
607        buf.velocities[0] = [1.0, 0.0, 0.0];
608        let forces = vec![[0.0_f32; 3]];
609        for _ in 0..10 {
610            integrate_sph_gpu(&mut buf, &forces, 0.1);
611        }
612        assert!((buf.positions[0][0] - 1.0).abs() < 1e-4);
613    }
614}