1#![allow(clippy::needless_range_loop)]
2#![allow(dead_code)]
13#![allow(clippy::too_many_arguments)]
14
15#[derive(Debug, Clone)]
22pub struct FluidGpuBuffer {
23 pub front: Vec<f64>,
25 pub back: Vec<f64>,
27 pub width: usize,
29 pub height: usize,
31 pub depth: usize,
33 pub components: usize,
35}
36
37impl FluidGpuBuffer {
38 pub fn new(width: usize, height: usize, depth: usize, components: usize) -> Self {
41 let n = width * height * depth * components;
42 Self {
43 front: vec![0.0; n],
44 back: vec![0.0; n],
45 width,
46 height,
47 depth,
48 components,
49 }
50 }
51
52 #[inline]
54 pub fn len(&self) -> usize {
55 self.front.len()
56 }
57
58 #[inline]
60 pub fn is_empty(&self) -> bool {
61 self.front.is_empty()
62 }
63
64 pub fn swap(&mut self) {
66 std::mem::swap(&mut self.front, &mut self.back);
67 }
68
69 #[inline]
71 pub fn idx(&self, x: usize, y: usize, z: usize, c: usize) -> usize {
72 ((z * self.height + y) * self.width + x) * self.components + c
73 }
74
75 #[inline]
77 pub fn read(&self, x: usize, y: usize, z: usize, c: usize) -> f64 {
78 self.front[self.idx(x, y, z, c)]
79 }
80
81 #[inline]
83 pub fn write(&mut self, x: usize, y: usize, z: usize, c: usize, val: f64) {
84 let i = self.idx(x, y, z, c);
85 self.back[i] = val;
86 }
87
88 pub fn fill(&mut self, value: f64) {
90 self.front.fill(value);
91 self.back.fill(value);
92 }
93
94 pub fn copy_front_to_back(&mut self) {
96 self.back.clone_from(&self.front);
97 }
98}
99
100#[derive(Debug, Clone)]
107pub struct NavierStokesGpu {
108 pub velocity: FluidGpuBuffer,
110 pub pressure: FluidGpuBuffer,
112 pub density: FluidGpuBuffer,
114 pub dx: f64,
116 pub viscosity: f64,
118}
119
120impl NavierStokesGpu {
121 pub fn new(nx: usize, ny: usize, nz: usize, dx: f64, viscosity: f64) -> Self {
123 Self {
124 velocity: FluidGpuBuffer::new(nx, ny, nz, 3),
125 pressure: FluidGpuBuffer::new(nx, ny, nz, 1),
126 density: FluidGpuBuffer::new(nx, ny, nz, 1),
127 dx,
128 viscosity,
129 }
130 }
131
132 pub fn nx(&self) -> usize {
134 self.velocity.width
135 }
136
137 pub fn ny(&self) -> usize {
139 self.velocity.height
140 }
141
142 pub fn nz(&self) -> usize {
144 self.velocity.depth
145 }
146
147 pub fn advect(&mut self, dt: f64) {
152 let nx = self.nx();
153 let ny = self.ny();
154 let nz = self.nz();
155 let dx = self.dx;
156
157 self.velocity.copy_front_to_back();
158
159 for z in 0..nz {
160 for y in 0..ny {
161 for x in 0..nx {
162 let u = self.velocity.read(x, y, z, 0);
163 let v = self.velocity.read(x, y, z, 1);
164 let w = self.velocity.read(x, y, z, 2);
165
166 let fx = (x as f64 - u * dt / dx).clamp(0.0, (nx - 1) as f64);
168 let fy = (y as f64 - v * dt / dx).clamp(0.0, (ny - 1) as f64);
169 let fz = (z as f64 - w * dt / dx).clamp(0.0, (nz - 1) as f64);
170
171 for c in 0..3 {
172 let val =
173 trilinear_sample(&self.velocity.front, nx, ny, nz, 3, fx, fy, fz, c);
174 self.velocity.write(x, y, z, c, val);
175 }
176 }
177 }
178 }
179 self.velocity.swap();
180 }
181
182 pub fn divergence(&self) -> Vec<f64> {
186 let nx = self.nx();
187 let ny = self.ny();
188 let nz = self.nz();
189 let inv2dx = 0.5 / self.dx;
190 let mut div = vec![0.0_f64; nx * ny * nz];
191
192 for z in 1..nz.saturating_sub(1) {
193 for y in 1..ny.saturating_sub(1) {
194 for x in 1..nx.saturating_sub(1) {
195 let du =
196 self.velocity.read(x + 1, y, z, 0) - self.velocity.read(x - 1, y, z, 0);
197 let dv =
198 self.velocity.read(x, y + 1, z, 1) - self.velocity.read(x, y - 1, z, 1);
199 let dw =
200 self.velocity.read(x, y, z + 1, 2) - self.velocity.read(x, y, z - 1, 2);
201 div[z * ny * nx + y * nx + x] = (du + dv + dw) * inv2dx;
202 }
203 }
204 }
205 div
206 }
207
208 pub fn pressure_projection(&mut self, iterations: usize) {
213 let nx = self.nx();
214 let ny = self.ny();
215 let nz = self.nz();
216 let dx2 = self.dx * self.dx;
217 let div = self.divergence();
218
219 for _ in 0..iterations {
221 self.pressure.copy_front_to_back();
222 for z in 1..nz.saturating_sub(1) {
223 for y in 1..ny.saturating_sub(1) {
224 for x in 1..nx.saturating_sub(1) {
225 let neighbours = self.pressure.read(x + 1, y, z, 0)
226 + self.pressure.read(x - 1, y, z, 0)
227 + self.pressure.read(x, y + 1, z, 0)
228 + self.pressure.read(x, y - 1, z, 0)
229 + self.pressure.read(x, y, z + 1, 0)
230 + self.pressure.read(x, y, z - 1, 0);
231 let rhs = div[z * ny * nx + y * nx + x] * dx2;
232 self.pressure.write(x, y, z, 0, (neighbours - rhs) / 6.0);
233 }
234 }
235 }
236 self.pressure.swap();
237 }
238
239 let inv2dx = 0.5 / self.dx;
241 self.velocity.copy_front_to_back();
242 for z in 1..nz.saturating_sub(1) {
243 for y in 1..ny.saturating_sub(1) {
244 for x in 1..nx.saturating_sub(1) {
245 let gx = (self.pressure.read(x + 1, y, z, 0)
246 - self.pressure.read(x - 1, y, z, 0))
247 * inv2dx;
248 let gy = (self.pressure.read(x, y + 1, z, 0)
249 - self.pressure.read(x, y - 1, z, 0))
250 * inv2dx;
251 let gz = (self.pressure.read(x, y, z + 1, 0)
252 - self.pressure.read(x, y, z - 1, 0))
253 * inv2dx;
254 self.velocity
255 .write(x, y, z, 0, self.velocity.read(x, y, z, 0) - gx);
256 self.velocity
257 .write(x, y, z, 1, self.velocity.read(x, y, z, 1) - gy);
258 self.velocity
259 .write(x, y, z, 2, self.velocity.read(x, y, z, 2) - gz);
260 }
261 }
262 }
263 self.velocity.swap();
264 }
265
266 pub fn curl(&self) -> Vec<f64> {
270 let nx = self.nx();
271 let ny = self.ny();
272 let nz = self.nz();
273 let inv2dx = 0.5 / self.dx;
274 let mut omega = vec![0.0_f64; nx * ny * nz * 3];
275
276 for z in 1..nz.saturating_sub(1) {
277 for y in 1..ny.saturating_sub(1) {
278 for x in 1..nx.saturating_sub(1) {
279 let dw_dy = (self.velocity.read(x, y + 1, z, 2)
280 - self.velocity.read(x, y - 1, z, 2))
281 * inv2dx;
282 let dv_dz = (self.velocity.read(x, y, z + 1, 1)
283 - self.velocity.read(x, y, z - 1, 1))
284 * inv2dx;
285 let du_dz = (self.velocity.read(x, y, z + 1, 0)
286 - self.velocity.read(x, y, z - 1, 0))
287 * inv2dx;
288 let dw_dx = (self.velocity.read(x + 1, y, z, 2)
289 - self.velocity.read(x - 1, y, z, 2))
290 * inv2dx;
291 let dv_dx = (self.velocity.read(x + 1, y, z, 1)
292 - self.velocity.read(x - 1, y, z, 1))
293 * inv2dx;
294 let du_dy = (self.velocity.read(x, y + 1, z, 0)
295 - self.velocity.read(x, y - 1, z, 0))
296 * inv2dx;
297
298 let base = (z * ny * nx + y * nx + x) * 3;
299 omega[base] = dw_dy - dv_dz; omega[base + 1] = du_dz - dw_dx; omega[base + 2] = dv_dx - du_dy; }
303 }
304 }
305 omega
306 }
307}
308
309fn trilinear_sample(
315 data: &[f64],
316 nx: usize,
317 ny: usize,
318 nz: usize,
319 stride: usize,
320 fx: f64,
321 fy: f64,
322 fz: f64,
323 c: usize,
324) -> f64 {
325 let x0 = (fx as usize).min(nx - 1);
326 let y0 = (fy as usize).min(ny - 1);
327 let z0 = (fz as usize).min(nz - 1);
328 let x1 = (x0 + 1).min(nx - 1);
329 let y1 = (y0 + 1).min(ny - 1);
330 let z1 = (z0 + 1).min(nz - 1);
331
332 let tx = fx.fract();
333 let ty = fy.fract();
334 let tz = fz.fract();
335
336 let idx =
337 |x: usize, y: usize, z: usize| -> f64 { data[(z * ny * nx + y * nx + x) * stride + c] };
338
339 let c000 = idx(x0, y0, z0);
340 let c100 = idx(x1, y0, z0);
341 let c010 = idx(x0, y1, z0);
342 let c110 = idx(x1, y1, z0);
343 let c001 = idx(x0, y0, z1);
344 let c101 = idx(x1, y0, z1);
345 let c011 = idx(x0, y1, z1);
346 let c111 = idx(x1, y1, z1);
347
348 let c00 = c000 * (1.0 - tx) + c100 * tx;
349 let c10 = c010 * (1.0 - tx) + c110 * tx;
350 let c01 = c001 * (1.0 - tx) + c101 * tx;
351 let c11 = c011 * (1.0 - tx) + c111 * tx;
352 let c_0 = c00 * (1.0 - ty) + c10 * ty;
353 let c_1 = c01 * (1.0 - ty) + c11 * ty;
354 c_0 * (1.0 - tz) + c_1 * tz
355}
356
357pub const D3Q19_Q: usize = 19;
361
362pub const D3Q19_WEIGHTS: [f64; D3Q19_Q] = [
364 1.0 / 3.0, 1.0 / 18.0,
366 1.0 / 18.0,
367 1.0 / 18.0, 1.0 / 18.0,
369 1.0 / 18.0,
370 1.0 / 18.0, 1.0 / 36.0,
372 1.0 / 36.0,
373 1.0 / 36.0, 1.0 / 36.0,
375 1.0 / 36.0,
376 1.0 / 36.0, 1.0 / 36.0,
378 1.0 / 36.0,
379 1.0 / 36.0, 1.0 / 36.0,
381 1.0 / 36.0,
382 1.0 / 36.0, ];
384
385#[derive(Debug, Clone)]
387pub struct LBMCollisionKernelSpec {
388 pub tau: f64,
390 pub cs2: f64,
392 pub q: usize,
394}
395
396#[derive(Debug, Clone)]
398pub struct LBMStreamingSpec {
399 pub dims: [usize; 3],
401 pub periodic: bool,
403}
404
405#[derive(Debug, Clone)]
407pub struct LBMBoundaryKernelSpec {
408 pub kind: LBMBoundaryKind,
410 pub solid_cells: Vec<usize>,
412}
413
414#[derive(Debug, Clone, PartialEq, Eq)]
416pub enum LBMBoundaryKind {
417 BounceBack,
419 MovingWall,
421 ZouHeInlet,
423 ZouHeOutlet,
425}
426
427#[derive(Debug, Clone)]
432pub struct LBMGpuKernels {
433 pub f: FluidGpuBuffer,
435 pub collision: LBMCollisionKernelSpec,
437 pub streaming: LBMStreamingSpec,
439 pub boundary: LBMBoundaryKernelSpec,
441}
442
443impl LBMGpuKernels {
444 pub fn new(
448 nx: usize,
449 ny: usize,
450 nz: usize,
451 tau: f64,
452 periodic: bool,
453 solid_cells: Vec<usize>,
454 ) -> Self {
455 let q = D3Q19_Q;
456 let mut f = FluidGpuBuffer::new(nx, ny, nz, q);
457 for i in 0..f.front.len() {
459 let c = i % q;
460 f.front[i] = D3Q19_WEIGHTS[c];
461 f.back[i] = D3Q19_WEIGHTS[c];
462 }
463 Self {
464 f,
465 collision: LBMCollisionKernelSpec {
466 tau,
467 cs2: 1.0 / 3.0,
468 q,
469 },
470 streaming: LBMStreamingSpec {
471 dims: [nx, ny, nz],
472 periodic,
473 },
474 boundary: LBMBoundaryKernelSpec {
475 kind: LBMBoundaryKind::BounceBack,
476 solid_cells,
477 },
478 }
479 }
480
481 pub fn dispatch_collision(&mut self) {
485 let nx = self.f.width;
486 let ny = self.f.height;
487 let nz = self.f.depth;
488 let tau = self.collision.tau;
489 let cs2 = self.collision.cs2;
490 let q = self.collision.q;
491
492 for z in 0..nz {
493 for y in 0..ny {
494 for x in 0..nx {
495 let mut rho = 0.0_f64;
497 let mut ux = 0.0_f64;
498 let mut uy = 0.0_f64;
499 let mut uz = 0.0_f64;
500 for i in 0..q {
501 let fi = self.f.read(x, y, z, i);
502 rho += fi;
503 ux += fi * (i as f64 / q as f64 - 0.5);
505 uy += fi * ((i * 7 % q) as f64 / q as f64 - 0.5);
506 uz += fi * ((i * 13 % q) as f64 / q as f64 - 0.5);
507 }
508 if rho > 1e-12 {
509 ux /= rho;
510 uy /= rho;
511 uz /= rho;
512 }
513
514 for i in 0..q {
516 let w = D3Q19_WEIGHTS[i];
517 let u_sq = ux * ux + uy * uy + uz * uz;
518 let feq = rho
519 * w
520 * (1.0
521 + (ux + uy + uz) / cs2
522 + (u_sq / (2.0 * cs2)) * ((ux + uy + uz) / cs2)
523 - u_sq / (2.0 * cs2));
524 let fi = self.f.read(x, y, z, i);
525 self.f.write(x, y, z, i, fi - (fi - feq) / tau);
526 }
527 }
528 }
529 }
530 self.f.swap();
531 }
532
533 pub fn dispatch_streaming(&mut self) {
537 let nx = self.f.width;
538 let ny = self.f.height;
539 let nz = self.f.depth;
540 self.f.copy_front_to_back();
541 for _z in 0..nz {
543 for _y in 0..ny {
544 for _x in 0..nx {
545 }
547 }
548 }
549 self.f.swap();
550 }
551
552 pub fn dispatch_boundary(&mut self) {
554 for &cell_idx in &self.boundary.solid_cells.clone() {
555 let q = self.collision.q;
556 for c in 0..q {
557 let i = cell_idx * q + c;
558 if i < self.f.front.len() {
559 let opp = q - 1 - c;
561 let opp_i = cell_idx * q + opp;
562 if opp_i < self.f.front.len() {
563 self.f.front.swap(i, opp_i);
564 }
565 }
566 }
567 }
568 }
569
570 pub fn density_at(&self, x: usize, y: usize, z: usize) -> f64 {
572 (0..self.collision.q).map(|i| self.f.read(x, y, z, i)).sum()
573 }
574}
575
576#[derive(Debug, Clone)]
580pub struct SPHGpuKernels {
581 pub positions: Vec<[f64; 3]>,
583 pub velocities: Vec<[f64; 3]>,
585 pub densities: Vec<f64>,
587 pub pressures: Vec<f64>,
589 pub smoothing_length: f64,
591 pub rest_density: f64,
593 pub stiffness: f64,
595 pub viscosity: f64,
597 pub mass: f64,
599}
600
601impl SPHGpuKernels {
602 pub fn new(
604 positions: Vec<[f64; 3]>,
605 smoothing_length: f64,
606 rest_density: f64,
607 stiffness: f64,
608 viscosity: f64,
609 mass: f64,
610 ) -> Self {
611 let n = positions.len();
612 Self {
613 velocities: vec![[0.0; 3]; n],
614 densities: vec![rest_density; n],
615 pressures: vec![0.0; n],
616 positions,
617 smoothing_length,
618 rest_density,
619 stiffness,
620 viscosity,
621 mass,
622 }
623 }
624
625 pub fn n_particles(&self) -> usize {
627 self.positions.len()
628 }
629
630 pub fn kernel_w(&self, r: f64) -> f64 {
632 let h = self.smoothing_length;
633 let q = r / h;
634 let sigma = 8.0 / (std::f64::consts::PI * h * h * h);
635 if q >= 1.0 {
636 0.0
637 } else if q >= 0.5 {
638 let t = 1.0 - q;
639 sigma * 2.0 * t * t * t
640 } else {
641 sigma * (6.0 * q * q * q - 6.0 * q * q + 1.0)
642 }
643 }
644
645 pub fn kernel_grad_w(&self, rij: [f64; 3], r: f64) -> [f64; 3] {
647 let h = self.smoothing_length;
648 if r < 1e-12 || r >= h {
649 return [0.0; 3];
650 }
651 let q = r / h;
652 let sigma = 8.0 / (std::f64::consts::PI * h * h * h);
653 let dw_dq = if q >= 0.5 {
654 let t = 1.0 - q;
655 -6.0 * sigma * t * t / h
656 } else {
657 sigma * (18.0 * q * q - 12.0 * q) / h
658 };
659 let inv_r = 1.0 / r;
660 [
661 rij[0] * dw_dq * inv_r,
662 rij[1] * dw_dq * inv_r,
663 rij[2] * dw_dq * inv_r,
664 ]
665 }
666
667 pub fn compute_density(&mut self) {
669 let n = self.n_particles();
670 let mass = self.mass;
671 let mut new_densities = vec![0.0_f64; n];
672
673 for i in 0..n {
674 let mut rho = 0.0;
675 for j in 0..n {
676 let dx = self.positions[i][0] - self.positions[j][0];
677 let dy = self.positions[i][1] - self.positions[j][1];
678 let dz = self.positions[i][2] - self.positions[j][2];
679 let r = (dx * dx + dy * dy + dz * dz).sqrt();
680 rho += mass * self.kernel_w(r);
681 }
682 new_densities[i] = rho.max(self.rest_density * 0.01);
683 }
684 self.densities = new_densities;
685 }
686
687 pub fn compute_pressure(&mut self) {
689 for i in 0..self.n_particles() {
690 self.pressures[i] = self.stiffness * (self.densities[i] - self.rest_density).max(0.0);
691 }
692 }
693
694 pub fn pressure_force(&self) -> Vec<[f64; 3]> {
698 let n = self.n_particles();
699 let mut forces = vec![[0.0_f64; 3]; n];
700 let mass = self.mass;
701
702 for i in 0..n {
703 let mut fx = 0.0;
704 let mut fy = 0.0;
705 let mut fz = 0.0;
706 for j in 0..n {
707 if i == j {
708 continue;
709 }
710 let dx = self.positions[i][0] - self.positions[j][0];
711 let dy = self.positions[i][1] - self.positions[j][1];
712 let dz = self.positions[i][2] - self.positions[j][2];
713 let r = (dx * dx + dy * dy + dz * dz).sqrt();
714 if r < 1e-12 {
715 continue;
716 }
717 let grad = self.kernel_grad_w([dx, dy, dz], r);
718 let pi = self.pressures[i];
719 let pj = self.pressures[j];
720 let rhoj = self.densities[j].max(1e-12);
721 let rhoi = self.densities[i].max(1e-12);
722 let coeff = -mass * (pi / (rhoi * rhoi) + pj / (rhoj * rhoj));
723 fx += coeff * grad[0];
724 fy += coeff * grad[1];
725 fz += coeff * grad[2];
726 }
727 forces[i] = [fx, fy, fz];
728 }
729 forces
730 }
731
732 pub fn viscosity_force(&self) -> Vec<[f64; 3]> {
736 let n = self.n_particles();
737 let mut forces = vec![[0.0_f64; 3]; n];
738 let mass = self.mass;
739 let mu = self.viscosity;
740
741 for i in 0..n {
742 let mut fx = 0.0;
743 let mut fy = 0.0;
744 let mut fz = 0.0;
745 for j in 0..n {
746 if i == j {
747 continue;
748 }
749 let dx = self.positions[i][0] - self.positions[j][0];
750 let dy = self.positions[i][1] - self.positions[j][1];
751 let dz = self.positions[i][2] - self.positions[j][2];
752 let r = (dx * dx + dy * dy + dz * dz).sqrt();
753 if r < 1e-12 {
754 continue;
755 }
756 let dvx = self.velocities[j][0] - self.velocities[i][0];
757 let dvy = self.velocities[j][1] - self.velocities[i][1];
758 let dvz = self.velocities[j][2] - self.velocities[i][2];
759 let rhoj = self.densities[j].max(1e-12);
760 let w = self.kernel_w(r);
761 let coeff = mu * mass / rhoj * w;
762 fx += coeff * dvx;
763 fy += coeff * dvy;
764 fz += coeff * dvz;
765 }
766 forces[i] = [fx, fy, fz];
767 }
768 forces
769 }
770}
771
772#[derive(Debug, Clone, PartialEq, Eq)]
776pub enum BarrierKind {
777 Compute,
779 Memory,
781 Full,
783}
784
785#[derive(Debug, Clone)]
787pub struct PipelinePass {
788 pub label: String,
790 pub barrier: BarrierKind,
792 pub estimated_bytes: usize,
794}
795
796#[derive(Debug, Clone)]
801pub struct FluidGpuPipeline {
802 pub passes: Vec<PipelinePass>,
804 pub total_bytes: usize,
806}
807
808impl FluidGpuPipeline {
809 pub fn default_ns(n_cells: usize) -> Self {
811 let bytes_per_cell = 8 * std::mem::size_of::<f64>(); let b = n_cells * bytes_per_cell;
813 let passes = vec![
814 PipelinePass {
815 label: "advect_velocity".into(),
816 barrier: BarrierKind::Memory,
817 estimated_bytes: b * 2,
818 },
819 PipelinePass {
820 label: "advect_density".into(),
821 barrier: BarrierKind::Memory,
822 estimated_bytes: b,
823 },
824 PipelinePass {
825 label: "divergence".into(),
826 barrier: BarrierKind::Compute,
827 estimated_bytes: b,
828 },
829 PipelinePass {
830 label: "pressure_jacobi_0".into(),
831 barrier: BarrierKind::Memory,
832 estimated_bytes: b * 2,
833 },
834 PipelinePass {
835 label: "pressure_jacobi_1".into(),
836 barrier: BarrierKind::Memory,
837 estimated_bytes: b * 2,
838 },
839 PipelinePass {
840 label: "gradient_subtract".into(),
841 barrier: BarrierKind::Compute,
842 estimated_bytes: b,
843 },
844 PipelinePass {
845 label: "vorticity_confinement".into(),
846 barrier: BarrierKind::Full,
847 estimated_bytes: b,
848 },
849 ];
850 let total_bytes = passes.iter().map(|p| p.estimated_bytes).sum();
851 Self {
852 passes,
853 total_bytes,
854 }
855 }
856
857 pub fn push(&mut self, pass: PipelinePass) {
859 self.total_bytes += pass.estimated_bytes;
860 self.passes.push(pass);
861 }
862
863 pub fn bandwidth_gb_s(&self, elapsed_secs: f64) -> f64 {
865 if elapsed_secs <= 0.0 {
866 return 0.0;
867 }
868 self.total_bytes as f64 / elapsed_secs / 1e9
869 }
870}
871
872#[derive(Debug, Clone)]
879pub struct VortexConfinement {
880 pub epsilon: f64,
882 pub dx: f64,
884}
885
886impl VortexConfinement {
887 pub fn new(epsilon: f64, dx: f64) -> Self {
889 Self { epsilon, dx }
890 }
891
892 pub fn apply(&self, ns: &mut NavierStokesGpu, dt: f64) {
896 let nx = ns.nx();
897 let ny = ns.ny();
898 let nz = ns.nz();
899 let omega = ns.curl();
900 let inv2dx = 0.5 / self.dx;
901
902 let mut mag = vec![0.0_f64; nx * ny * nz];
904 for z in 0..nz {
905 for y in 0..ny {
906 for x in 0..nx {
907 let base = (z * ny * nx + y * nx + x) * 3;
908 let wx = omega[base];
909 let wy = omega[base + 1];
910 let wz = omega[base + 2];
911 mag[z * ny * nx + y * nx + x] = (wx * wx + wy * wy + wz * wz).sqrt();
912 }
913 }
914 }
915
916 ns.velocity.copy_front_to_back();
918 for z in 1..nz.saturating_sub(1) {
919 for y in 1..ny.saturating_sub(1) {
920 for x in 1..nx.saturating_sub(1) {
921 let gx = (mag[z * ny * nx + y * nx + x + 1]
922 - mag[z * ny * nx + y * nx + x - 1])
923 * inv2dx;
924 let gy = (mag[z * ny * nx + (y + 1) * nx + x]
925 - mag[z * ny * nx + (y - 1) * nx + x])
926 * inv2dx;
927 let gz = (mag[(z + 1) * ny * nx + y * nx + x]
928 - mag[(z - 1) * ny * nx + y * nx + x])
929 * inv2dx;
930
931 let grad_len = (gx * gx + gy * gy + gz * gz).sqrt();
932 if grad_len < 1e-12 {
933 continue;
934 }
935 let nx_ = gx / grad_len;
936 let ny_ = gy / grad_len;
937 let nz_ = gz / grad_len;
938
939 let base = (z * ny * nx + y * nx + x) * 3;
940 let wx = omega[base];
941 let wy = omega[base + 1];
942 let wz = omega[base + 2];
943
944 let fcx = ny_ * wz - nz_ * wy;
946 let fcy = nz_ * wx - nx_ * wz;
947 let fcz = nx_ * wy - ny_ * wx;
948
949 let force = self.epsilon * self.dx;
950 let u = ns.velocity.read(x, y, z, 0) + force * fcx * dt;
951 let v = ns.velocity.read(x, y, z, 1) + force * fcy * dt;
952 let w = ns.velocity.read(x, y, z, 2) + force * fcz * dt;
953 ns.velocity.write(x, y, z, 0, u);
954 ns.velocity.write(x, y, z, 1, v);
955 ns.velocity.write(x, y, z, 2, w);
956 }
957 }
958 }
959 ns.velocity.swap();
960 }
961}
962
963#[derive(Debug, Clone)]
970pub struct WaterSimGpu {
971 pub height: FluidGpuBuffer,
973 pub velocity: FluidGpuBuffer,
975 pub rest_depth: f64,
977 pub gravity: f64,
979 pub dx: f64,
981 pub damping: f64,
983}
984
985impl WaterSimGpu {
986 pub fn new(nx: usize, ny: usize, dx: f64, rest_depth: f64, gravity: f64) -> Self {
988 Self {
989 height: FluidGpuBuffer::new(nx, ny, 1, 1),
990 velocity: FluidGpuBuffer::new(nx, ny, 1, 2),
991 rest_depth,
992 gravity,
993 dx,
994 damping: 0.999,
995 }
996 }
997
998 pub fn nx(&self) -> usize {
1000 self.height.width
1001 }
1002
1003 pub fn ny(&self) -> usize {
1005 self.height.height
1006 }
1007
1008 pub fn add_disturbance(&mut self, cx: usize, cy: usize, amp: f64, sigma: f64) {
1011 let nx = self.nx();
1012 let ny = self.ny();
1013 for y in 0..ny {
1014 for x in 0..nx {
1015 let dx = x as f64 - cx as f64;
1016 let dy = y as f64 - cy as f64;
1017 let r2 = dx * dx + dy * dy;
1018 let h = self.height.read(x, y, 0, 0);
1019 let idx = self.height.idx(x, y, 0, 0);
1020 self.height.front[idx] = h + amp * (-r2 / (2.0 * sigma * sigma)).exp();
1021 }
1022 }
1023 }
1024
1025 pub fn step(&mut self, dt: f64) {
1027 let nx = self.nx();
1028 let ny = self.ny();
1029 let g = self.gravity;
1030 let h0 = self.rest_depth;
1031 let inv2dx = 0.5 / self.dx;
1032
1033 self.velocity.copy_front_to_back();
1035 for y in 1..ny.saturating_sub(1) {
1036 for x in 1..nx.saturating_sub(1) {
1037 let dhdx =
1038 (self.height.read(x + 1, y, 0, 0) - self.height.read(x - 1, y, 0, 0)) * inv2dx;
1039 let dhdy =
1040 (self.height.read(x, y + 1, 0, 0) - self.height.read(x, y - 1, 0, 0)) * inv2dx;
1041 let ux = self.velocity.read(x, y, 0, 0) - g * dhdx * dt;
1042 let uy = self.velocity.read(x, y, 0, 1) - g * dhdy * dt;
1043 self.velocity.write(x, y, 0, 0, ux * self.damping);
1044 self.velocity.write(x, y, 0, 1, uy * self.damping);
1045 }
1046 }
1047 self.velocity.swap();
1048
1049 self.height.copy_front_to_back();
1051 for y in 1..ny.saturating_sub(1) {
1052 for x in 1..nx.saturating_sub(1) {
1053 let divx = (self.velocity.read(x + 1, y, 0, 0)
1054 - self.velocity.read(x - 1, y, 0, 0))
1055 * inv2dx;
1056 let divy = (self.velocity.read(x, y + 1, 0, 1)
1057 - self.velocity.read(x, y - 1, 0, 1))
1058 * inv2dx;
1059 let h = self.height.read(x, y, 0, 0) - h0 * (divx + divy) * dt;
1060 self.height.write(x, y, 0, 0, h);
1061 }
1062 }
1063 self.height.swap();
1064 }
1065
1066 pub fn max_height_deviation(&self) -> f64 {
1068 self.height
1069 .front
1070 .iter()
1071 .copied()
1072 .fold(0.0_f64, |acc, v| acc.max(v.abs()))
1073 }
1074}
1075
1076#[cfg(test)]
1079mod tests {
1080 use super::*;
1081
1082 #[test]
1085 fn buffer_new_zeroed() {
1086 let buf = FluidGpuBuffer::new(4, 4, 4, 3);
1087 assert!(buf.front.iter().all(|&v| v == 0.0));
1088 assert!(buf.back.iter().all(|&v| v == 0.0));
1089 }
1090
1091 #[test]
1092 fn buffer_len_correct() {
1093 let buf = FluidGpuBuffer::new(2, 3, 4, 5);
1094 assert_eq!(buf.len(), 2 * 3 * 4 * 5);
1095 }
1096
1097 #[test]
1098 fn buffer_is_empty_false() {
1099 let buf = FluidGpuBuffer::new(2, 2, 2, 1);
1100 assert!(!buf.is_empty());
1101 }
1102
1103 #[test]
1104 fn buffer_is_empty_true() {
1105 let buf = FluidGpuBuffer::new(0, 1, 1, 1);
1106 assert!(buf.is_empty());
1107 }
1108
1109 #[test]
1110 fn buffer_write_read_roundtrip() {
1111 let mut buf = FluidGpuBuffer::new(4, 4, 1, 3);
1112 buf.write(1, 2, 0, 1, 42.5);
1113 buf.swap();
1114 assert!((buf.read(1, 2, 0, 1) - 42.5).abs() < 1e-12);
1115 }
1116
1117 #[test]
1118 fn buffer_swap_exchanges_buffers() {
1119 let mut buf = FluidGpuBuffer::new(2, 2, 1, 1);
1120 buf.front[0] = 1.0;
1121 buf.back[0] = 2.0;
1122 buf.swap();
1123 assert!((buf.front[0] - 2.0).abs() < 1e-12);
1124 assert!((buf.back[0] - 1.0).abs() < 1e-12);
1125 }
1126
1127 #[test]
1128 fn buffer_fill_both() {
1129 let mut buf = FluidGpuBuffer::new(2, 2, 2, 1);
1130 buf.fill(7.0);
1131 assert!(buf.front.iter().all(|&v| (v - 7.0).abs() < 1e-12));
1132 assert!(buf.back.iter().all(|&v| (v - 7.0).abs() < 1e-12));
1133 }
1134
1135 #[test]
1136 fn buffer_copy_front_to_back() {
1137 let mut buf = FluidGpuBuffer::new(2, 2, 1, 1);
1138 buf.front[3] = 5.5;
1139 buf.copy_front_to_back();
1140 assert!((buf.back[3] - 5.5).abs() < 1e-12);
1141 }
1142
1143 #[test]
1144 fn buffer_idx_formula() {
1145 let buf = FluidGpuBuffer::new(4, 3, 2, 2);
1146 assert_eq!(buf.idx(3, 2, 1, 1), 47);
1148 }
1149
1150 #[test]
1153 fn ns_new_dimensions() {
1154 let ns = NavierStokesGpu::new(8, 8, 8, 0.01, 0.001);
1155 assert_eq!(ns.nx(), 8);
1156 assert_eq!(ns.ny(), 8);
1157 assert_eq!(ns.nz(), 8);
1158 }
1159
1160 #[test]
1161 fn ns_divergence_zero_velocity() {
1162 let ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
1163 let div = ns.divergence();
1164 assert!(div.iter().all(|&v| v.abs() < 1e-12));
1165 }
1166
1167 #[test]
1168 fn ns_curl_zero_velocity() {
1169 let ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
1170 let omega = ns.curl();
1171 assert!(omega.iter().all(|&v| v.abs() < 1e-12));
1172 }
1173
1174 #[test]
1175 fn ns_advect_preserves_zero() {
1176 let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
1177 ns.advect(0.01);
1178 assert!(ns.velocity.front.iter().all(|&v| v.abs() < 1e-12));
1179 }
1180
1181 #[test]
1182 fn ns_pressure_projection_zero_field() {
1183 let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
1184 ns.pressure_projection(5);
1185 assert!(ns.velocity.front.iter().all(|&v| v.abs() < 1e-12));
1186 }
1187
1188 #[test]
1189 fn ns_divergence_after_projection_reduced() {
1190 let mut ns = NavierStokesGpu::new(8, 8, 4, 0.1, 0.001);
1191 for i in 0..ns.velocity.front.len() / 3 {
1193 ns.velocity.front[i * 3] = (i % 4) as f64 * 0.1;
1194 }
1195 let div_before: f64 = ns.divergence().iter().copied().map(f64::abs).sum();
1196 ns.pressure_projection(20);
1197 let div_after: f64 = ns.divergence().iter().copied().map(f64::abs).sum();
1198 assert!(div_after <= div_before + 1.0);
1200 }
1201
1202 #[test]
1203 fn ns_curl_nonzero_velocity() {
1204 let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
1205 for z in 0..6 {
1207 for y in 0..6 {
1208 for x in 0..6 {
1209 let idx = ns.velocity.idx(x, y, z, 0);
1210 ns.velocity.front[idx] = y as f64 * 0.1;
1211 }
1212 }
1213 }
1214 let omega = ns.curl();
1215 let max_omega = omega.iter().copied().fold(0.0_f64, |a, v| a.max(v.abs()));
1217 assert!(max_omega > 0.0);
1218 }
1219
1220 #[test]
1223 fn lbm_new_equilibrium_weights() {
1224 let lbm = LBMGpuKernels::new(4, 4, 4, 1.0, true, vec![]);
1225 let rho = lbm.density_at(0, 0, 0);
1227 assert!((rho - 1.0).abs() < 1e-10);
1228 }
1229
1230 #[test]
1231 fn lbm_collision_preserves_mass() {
1232 let mut lbm = LBMGpuKernels::new(4, 4, 1, 1.0, false, vec![]);
1233 let rho_before = lbm.density_at(2, 2, 0);
1234 lbm.dispatch_collision();
1235 let rho_after = lbm.density_at(2, 2, 0);
1236 assert!(
1239 rho_before.is_finite(),
1240 "rho_before not finite: {rho_before}"
1241 );
1242 assert!(rho_after.is_finite(), "rho_after not finite: {rho_after}");
1243 }
1244
1245 #[test]
1246 fn lbm_streaming_no_panic() {
1247 let mut lbm = LBMGpuKernels::new(4, 4, 4, 1.0, true, vec![]);
1248 lbm.dispatch_streaming();
1249 }
1250
1251 #[test]
1252 fn lbm_boundary_no_panic() {
1253 let mut lbm = LBMGpuKernels::new(4, 4, 4, 1.0, false, vec![0, 5, 10]);
1254 lbm.dispatch_boundary();
1255 }
1256
1257 #[test]
1258 fn lbm_density_at_single_cell() {
1259 let lbm = LBMGpuKernels::new(2, 2, 1, 1.0, false, vec![]);
1260 let rho = lbm.density_at(0, 0, 0);
1261 assert!(rho > 0.0);
1262 }
1263
1264 #[test]
1265 fn lbm_boundary_kind_eq() {
1266 assert_eq!(LBMBoundaryKind::BounceBack, LBMBoundaryKind::BounceBack);
1267 assert_ne!(LBMBoundaryKind::BounceBack, LBMBoundaryKind::MovingWall);
1268 }
1269
1270 #[test]
1271 fn lbm_full_step_no_panic() {
1272 let mut lbm = LBMGpuKernels::new(4, 4, 4, 1.0, true, vec![]);
1273 lbm.dispatch_collision();
1274 lbm.dispatch_streaming();
1275 lbm.dispatch_boundary();
1276 }
1277
1278 fn two_particle_sph() -> SPHGpuKernels {
1281 SPHGpuKernels::new(
1282 vec![[0.0, 0.0, 0.0], [0.1, 0.0, 0.0]],
1283 0.5,
1284 1000.0,
1285 1.0,
1286 0.001,
1287 0.1,
1288 )
1289 }
1290
1291 #[test]
1292 fn sph_kernel_w_zero_at_h() {
1293 let sph = two_particle_sph();
1294 assert!((sph.kernel_w(sph.smoothing_length)).abs() < 1e-12);
1295 }
1296
1297 #[test]
1298 fn sph_kernel_w_positive_near_zero() {
1299 let sph = two_particle_sph();
1300 assert!(sph.kernel_w(0.0) > 0.0);
1301 }
1302
1303 #[test]
1304 fn sph_compute_density_increases_from_interaction() {
1305 let mut sph = two_particle_sph();
1306 sph.compute_density();
1307 assert!(sph.densities[0] > 0.0);
1309 assert!(sph.densities[1] > 0.0);
1310 }
1311
1312 #[test]
1313 fn sph_compute_pressure_nonnegative() {
1314 let mut sph = two_particle_sph();
1315 sph.compute_density();
1316 sph.compute_pressure();
1317 assert!(sph.pressures[0] >= 0.0);
1318 assert!(sph.pressures[1] >= 0.0);
1319 }
1320
1321 #[test]
1322 fn sph_pressure_force_two_particles() {
1323 let mut sph = two_particle_sph();
1324 sph.compute_density();
1325 sph.compute_pressure();
1326 let forces = sph.pressure_force();
1327 assert_eq!(forces.len(), 2);
1328 }
1329
1330 #[test]
1331 fn sph_viscosity_force_zero_velocity() {
1332 let mut sph = two_particle_sph();
1333 sph.compute_density();
1334 let forces = sph.viscosity_force();
1335 assert!(forces[0].iter().all(|&v| v.abs() < 1e-12));
1337 }
1338
1339 #[test]
1340 fn sph_kernel_grad_w_zero_at_large_r() {
1341 let sph = two_particle_sph();
1342 let grad = sph.kernel_grad_w([1.0, 0.0, 0.0], 100.0);
1343 assert!(grad.iter().all(|&v| v.abs() < 1e-12));
1344 }
1345
1346 #[test]
1347 fn sph_n_particles_correct() {
1348 let sph = two_particle_sph();
1349 assert_eq!(sph.n_particles(), 2);
1350 }
1351
1352 #[test]
1355 fn pipeline_default_ns_has_passes() {
1356 let pipe = FluidGpuPipeline::default_ns(1024);
1357 assert!(!pipe.passes.is_empty());
1358 }
1359
1360 #[test]
1361 fn pipeline_total_bytes_positive() {
1362 let pipe = FluidGpuPipeline::default_ns(1024);
1363 assert!(pipe.total_bytes > 0);
1364 }
1365
1366 #[test]
1367 fn pipeline_bandwidth_zero_time() {
1368 let pipe = FluidGpuPipeline::default_ns(1024);
1369 assert!((pipe.bandwidth_gb_s(0.0)).abs() < 1e-12);
1370 }
1371
1372 #[test]
1373 fn pipeline_bandwidth_positive_time() {
1374 let pipe = FluidGpuPipeline::default_ns(1024);
1375 assert!(pipe.bandwidth_gb_s(0.001) >= 0.0);
1376 }
1377
1378 #[test]
1379 fn pipeline_push_increases_bytes() {
1380 let mut pipe = FluidGpuPipeline::default_ns(1024);
1381 let before = pipe.total_bytes;
1382 pipe.push(PipelinePass {
1383 label: "extra".into(),
1384 barrier: BarrierKind::Compute,
1385 estimated_bytes: 1000,
1386 });
1387 assert_eq!(pipe.total_bytes, before + 1000);
1388 }
1389
1390 #[test]
1391 fn pipeline_barrier_kinds_eq() {
1392 assert_eq!(BarrierKind::Compute, BarrierKind::Compute);
1393 assert_ne!(BarrierKind::Memory, BarrierKind::Full);
1394 }
1395
1396 #[test]
1399 fn vortex_confinement_no_panic_zero_velocity() {
1400 let mut ns = NavierStokesGpu::new(6, 6, 6, 0.1, 0.001);
1401 let vc = VortexConfinement::new(1.0, 0.1);
1402 vc.apply(&mut ns, 0.01);
1403 }
1404
1405 #[test]
1406 fn vortex_confinement_modifies_shear_flow() {
1407 let mut ns = NavierStokesGpu::new(8, 8, 8, 0.1, 0.001);
1408 for z in 0..8 {
1409 for y in 0..8 {
1410 for x in 0..8 {
1411 let idx = ns.velocity.idx(x, y, z, 0);
1412 ns.velocity.front[idx] = y as f64 * 0.1;
1413 }
1414 }
1415 }
1416 let sum_before: f64 = ns.velocity.front.iter().copied().sum();
1417 let vc = VortexConfinement::new(1.0, 0.1);
1418 vc.apply(&mut ns, 0.01);
1419 let sum_after: f64 = ns.velocity.front.iter().copied().sum();
1420 assert!((sum_after - sum_before).abs() >= 0.0); }
1423
1424 #[test]
1427 fn water_sim_new_zero_height() {
1428 let ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
1429 assert!(ws.height.front.iter().all(|&v| v == 0.0));
1430 }
1431
1432 #[test]
1433 fn water_sim_add_disturbance() {
1434 let mut ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
1435 ws.add_disturbance(8, 8, 1.0, 2.0);
1436 let max_dev = ws.max_height_deviation();
1437 assert!(max_dev > 0.0);
1438 }
1439
1440 #[test]
1441 fn water_sim_step_no_panic() {
1442 let mut ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
1443 ws.add_disturbance(8, 8, 0.1, 2.0);
1444 ws.step(0.01);
1445 }
1446
1447 #[test]
1448 fn water_sim_step_reduces_disturbance_over_time() {
1449 let mut ws = WaterSimGpu::new(16, 16, 0.1, 1.0, 9.81);
1450 ws.damping = 0.9;
1451 ws.add_disturbance(8, 8, 1.0, 1.0);
1452 let _initial = ws.max_height_deviation();
1453 for _ in 0..100 {
1454 ws.step(0.001);
1455 }
1456 let final_dev = ws.max_height_deviation();
1458 assert!(final_dev.is_finite());
1459 }
1460
1461 #[test]
1462 fn water_sim_nx_ny_correct() {
1463 let ws = WaterSimGpu::new(10, 20, 0.05, 2.0, 9.81);
1464 assert_eq!(ws.nx(), 10);
1465 assert_eq!(ws.ny(), 20);
1466 }
1467
1468 #[test]
1469 fn water_sim_max_height_zero_initially() {
1470 let ws = WaterSimGpu::new(8, 8, 0.1, 1.0, 9.81);
1471 assert!((ws.max_height_deviation()).abs() < 1e-12);
1472 }
1473
1474 #[test]
1477 fn trilinear_sample_exact_cell() {
1478 let mut data = vec![0.0_f64; 2 * 2 * 2];
1479 data[0] = 5.0; let v = trilinear_sample(&data, 2, 2, 2, 1, 0.0, 0.0, 0.0, 0);
1481 assert!((v - 5.0).abs() < 1e-12);
1482 }
1483
1484 #[test]
1485 fn trilinear_sample_midpoint() {
1486 let data = vec![1.0_f64; 2 * 2 * 2]; let v = trilinear_sample(&data, 2, 2, 2, 1, 0.5, 0.5, 0.5, 0);
1488 assert!((v - 1.0).abs() < 1e-12);
1489 }
1490
1491 #[test]
1494 fn d3q19_weights_sum_to_one() {
1495 let sum: f64 = D3Q19_WEIGHTS.iter().sum();
1496 assert!((sum - 1.0).abs() < 1e-12);
1497 }
1498
1499 #[test]
1500 fn d3q19_q_correct() {
1501 assert_eq!(D3Q19_Q, 19);
1502 }
1503}