1#![allow(clippy::ptr_arg)]
2#![allow(dead_code)]
8
9use crate::compute::ComputeKernel;
10use std::collections::HashMap;
11
12pub struct AabbSortKernel;
23
24impl ComputeKernel for AabbSortKernel {
25 fn name(&self) -> &str {
26 "AabbSortKernel"
27 }
28
29 fn execute(&self, inputs: &[&[f64]], outputs: &mut [Vec<f64>], _work_size: usize) {
30 if inputs.is_empty() || outputs.is_empty() {
31 return;
32 }
33 let aabbs = inputs[0];
34 let n = aabbs.len() / 6;
35 let mut indices: Vec<usize> = (0..n).collect();
37 indices.sort_by(|&a, &b| {
38 let ax = aabbs[a * 6];
39 let bx = aabbs[b * 6];
40 ax.partial_cmp(&bx).unwrap_or(std::cmp::Ordering::Equal)
41 });
42 outputs[0] = indices.iter().map(|&i| i as f64).collect();
43 }
44}
45
46pub struct AabbOverlapKernel;
53
54impl AabbOverlapKernel {
55 #[inline]
56 fn overlaps(a: &[f64], b: &[f64]) -> bool {
57 a[0] <= b[1] && a[1] >= b[0] && a[2] <= b[3] && a[3] >= b[2] && a[4] <= b[5] && a[5] >= b[4] }
62}
63
64#[allow(clippy::needless_range_loop)]
65impl ComputeKernel for AabbOverlapKernel {
66 fn name(&self) -> &str {
67 "AabbOverlapKernel"
68 }
69
70 fn execute(&self, inputs: &[&[f64]], outputs: &mut [Vec<f64>], _work_size: usize) {
71 if inputs.is_empty() || outputs.is_empty() {
72 return;
73 }
74 let aabbs = inputs[0];
75 let n = aabbs.len() / 6;
76 let mut pairs = Vec::new();
77 for i in 0..n {
78 for j in (i + 1)..n {
79 if Self::overlaps(&aabbs[i * 6..(i + 1) * 6], &aabbs[j * 6..(j + 1) * 6]) {
80 pairs.push(i as f64);
81 pairs.push(j as f64);
82 }
83 }
84 }
85 outputs[0] = pairs;
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq)]
95pub struct AabbGpu {
96 pub min: [f32; 3],
98 pub max: [f32; 3],
100 pub body_id: u32,
102}
103
104impl AabbGpu {
105 pub fn new(min: [f32; 3], max: [f32; 3], body_id: u32) -> Self {
107 Self { min, max, body_id }
108 }
109
110 #[inline]
112 pub fn overlaps(&self, other: &AabbGpu) -> bool {
113 self.min[0] <= other.max[0]
114 && self.max[0] >= other.min[0]
115 && self.min[1] <= other.max[1]
116 && self.max[1] >= other.min[1]
117 && self.min[2] <= other.max[2]
118 && self.max[2] >= other.min[2]
119 }
120}
121
122pub struct SortAndSweepGpu;
131
132impl SortAndSweepGpu {
133 pub fn detect_pairs(aabbs: &[AabbGpu]) -> Vec<(u32, u32)> {
139 if aabbs.is_empty() {
140 return Vec::new();
141 }
142
143 let mut sorted: Vec<&AabbGpu> = aabbs.iter().collect();
145 sorted.sort_by(|a, b| {
146 a.min[0]
147 .partial_cmp(&b.min[0])
148 .unwrap_or(std::cmp::Ordering::Equal)
149 });
150
151 let n = sorted.len();
152 let mut pairs = Vec::new();
153
154 for i in 0..n {
155 for j in (i + 1)..n {
156 if sorted[j].min[0] > sorted[i].max[0] {
158 break;
159 }
160 if sorted[i].overlaps(sorted[j]) {
161 let a = sorted[i].body_id;
162 let b = sorted[j].body_id;
163 let pair = if a < b { (a, b) } else { (b, a) };
164 pairs.push(pair);
165 }
166 }
167 }
168
169 pairs
170 }
171}
172
173#[derive(Debug, Clone)]
179pub struct UniformGridGpu {
180 pub cell_size: f32,
182 pub origin: [f32; 3],
184 pub dims: [u32; 3],
186}
187
188impl UniformGridGpu {
189 pub fn new(cell_size: f32, origin: [f32; 3], dims: [u32; 3]) -> Self {
191 Self {
192 cell_size,
193 origin,
194 dims,
195 }
196 }
197
198 pub fn cell_of(&self, pos: [f32; 3]) -> [i32; 3] {
200 [
201 ((pos[0] - self.origin[0]) / self.cell_size).floor() as i32,
202 ((pos[1] - self.origin[1]) / self.cell_size).floor() as i32,
203 ((pos[2] - self.origin[2]) / self.cell_size).floor() as i32,
204 ]
205 }
206
207 fn cell_key(ix: i32, iy: i32, iz: i32) -> u64 {
209 let x = (ix as i16) as u64 & 0xFFFF;
211 let y = (iy as i16) as u64 & 0xFFFF;
212 let z = (iz as i16) as u64 & 0xFFFF;
213 (z << 32) | (y << 16) | x
214 }
215
216 pub fn insert_aabbs(&self, aabbs: &[AabbGpu]) -> HashMap<u64, Vec<u32>> {
221 let mut map: HashMap<u64, Vec<u32>> = HashMap::new();
222
223 for aabb in aabbs {
224 let min_cell = self.cell_of(aabb.min);
225 let max_cell = self.cell_of(aabb.max);
226
227 for iz in min_cell[2]..=max_cell[2] {
228 for iy in min_cell[1]..=max_cell[1] {
229 for ix in min_cell[0]..=max_cell[0] {
230 let key = Self::cell_key(ix, iy, iz);
231 map.entry(key).or_default().push(aabb.body_id);
232 }
233 }
234 }
235 }
236
237 map
238 }
239
240 pub fn query_pairs(&self, aabbs: &[AabbGpu]) -> Vec<(u32, u32)> {
244 let map = self.insert_aabbs(aabbs);
245
246 let mut seen = std::collections::HashSet::new();
247 let mut pairs = Vec::new();
248
249 for body_list in map.values() {
250 let n = body_list.len();
251 for i in 0..n {
252 for j in (i + 1)..n {
253 let a = body_list[i];
254 let b = body_list[j];
255 let pair = if a < b { (a, b) } else { (b, a) };
256 if seen.insert(pair) {
257 if let (Some(aa), Some(bb)) = (
259 aabbs.iter().find(|x| x.body_id == pair.0),
260 aabbs.iter().find(|x| x.body_id == pair.1),
261 ) && aa.overlaps(bb)
262 {
263 pairs.push(pair);
264 }
265 }
266 }
267 }
268 }
269
270 pairs.sort();
271 pairs
272 }
273}
274
275pub fn morton_code(x: u32, y: u32, z: u32) -> u64 {
284 spread_bits(x as u64) | (spread_bits(y as u64) << 1) | (spread_bits(z as u64) << 2)
285}
286
287#[inline]
289fn spread_bits(mut v: u64) -> u64 {
290 v &= 0x1fffff; v = (v | (v << 32)) & 0x1f00000000ffff;
292 v = (v | (v << 16)) & 0x1f0000ff0000ff;
293 v = (v | (v << 8)) & 0x100f00f00f00f00f;
294 v = (v | (v << 4)) & 0x10c30c30c30c30c3;
295 v = (v | (v << 2)) & 0x1249249249249249;
296 v
297}
298
299#[allow(clippy::needless_range_loop)]
308pub fn sort_and_sweep_flat(aabbs: &[f64]) -> Vec<(usize, usize)> {
309 let n = aabbs.len() / 6;
310 if n == 0 {
311 return Vec::new();
312 }
313
314 let mut order: Vec<usize> = (0..n).collect();
316 order.sort_by(|&a, &b| {
317 let ax = aabbs[a * 6];
318 let bx = aabbs[b * 6];
319 ax.partial_cmp(&bx).unwrap_or(std::cmp::Ordering::Equal)
320 });
321
322 let mut pairs = Vec::new();
323 for i in 0..n {
324 let si = order[i];
325 let max_x_i = aabbs[si * 6 + 1];
326 for j in (i + 1)..n {
327 let sj = order[j];
328 if aabbs[sj * 6] > max_x_i {
329 break; }
331 let ai = &aabbs[si * 6..(si + 1) * 6];
333 let aj = &aabbs[sj * 6..(sj + 1) * 6];
334 if ai[0] <= aj[1]
335 && ai[1] >= aj[0]
336 && ai[2] <= aj[3]
337 && ai[3] >= aj[2]
338 && ai[4] <= aj[5]
339 && ai[5] >= aj[4]
340 {
341 let pair = if si < sj { (si, sj) } else { (sj, si) };
342 pairs.push(pair);
343 }
344 }
345 }
346 pairs.sort();
347 pairs.dedup();
348 pairs
349}
350
351pub fn assign_to_grid_cells(aabbs: &[f64], cell_size: f64, origin: [f64; 3]) -> Vec<(u64, usize)> {
359 let n = aabbs.len() / 6;
360 let mut result = Vec::new();
361 let pack = |ix: i64, iy: i64, iz: i64| -> u64 {
362 let x = (ix as i16) as u64 & 0xFFFF;
363 let y = (iy as i16) as u64 & 0xFFFF;
364 let z = (iz as i16) as u64 & 0xFFFF;
365 (z << 32) | (y << 16) | x
366 };
367 for i in 0..n {
368 let a = &aabbs[i * 6..(i + 1) * 6];
369 let ix0 = ((a[0] - origin[0]) / cell_size).floor() as i64;
370 let ix1 = ((a[1] - origin[0]) / cell_size).floor() as i64;
371 let iy0 = ((a[2] - origin[1]) / cell_size).floor() as i64;
372 let iy1 = ((a[3] - origin[1]) / cell_size).floor() as i64;
373 let iz0 = ((a[4] - origin[2]) / cell_size).floor() as i64;
374 let iz1 = ((a[5] - origin[2]) / cell_size).floor() as i64;
375 for iz in iz0..=iz1 {
376 for iy in iy0..=iy1 {
377 for ix in ix0..=ix1 {
378 result.push((pack(ix, iy, iz), i));
379 }
380 }
381 }
382 }
383 result
384}
385
386pub fn pairs_from_grid_assignments(assignments: &[(u64, usize)]) -> Vec<(usize, usize)> {
390 let mut by_cell: HashMap<u64, Vec<usize>> = HashMap::new();
391 for &(key, idx) in assignments {
392 by_cell.entry(key).or_default().push(idx);
393 }
394 let mut seen = std::collections::HashSet::new();
395 let mut pairs = Vec::new();
396 for bodies in by_cell.values() {
397 let n = bodies.len();
398 for i in 0..n {
399 for j in (i + 1)..n {
400 let a = bodies[i];
401 let b = bodies[j];
402 let p = if a < b { (a, b) } else { (b, a) };
403 if seen.insert(p) {
404 pairs.push(p);
405 }
406 }
407 }
408 }
409 pairs.sort();
410 pairs
411}
412
413pub fn morton_key_for_aabb(aabb: &AabbGpu, cell_size: f32, origin: [f32; 3]) -> u64 {
421 let cx = ((aabb.min[0] + aabb.max[0]) * 0.5 - origin[0]) / cell_size;
422 let cy = ((aabb.min[1] + aabb.max[1]) * 0.5 - origin[1]) / cell_size;
423 let cz = ((aabb.min[2] + aabb.max[2]) * 0.5 - origin[2]) / cell_size;
424
425 let ix = (cx.max(0.0) as u32).min(0x1F_FFFF);
426 let iy = (cy.max(0.0) as u32).min(0x1F_FFFF);
427 let iz = (cz.max(0.0) as u32).min(0x1F_FFFF);
428 morton_code(ix, iy, iz)
429}
430
431pub fn morton_sort(aabbs: &[AabbGpu], cell_size: f32, origin: [f32; 3]) -> Vec<AabbGpu> {
435 let mut keyed: Vec<(u64, AabbGpu)> = aabbs
436 .iter()
437 .map(|a| (morton_key_for_aabb(a, cell_size, origin), *a))
438 .collect();
439 keyed.sort_by_key(|&(k, _)| k);
440 keyed.into_iter().map(|(_, a)| a).collect()
441}
442
443#[derive(Debug, Clone, Default)]
449pub struct CompactPairList {
450 pairs: Vec<(u32, u32)>,
451}
452
453impl CompactPairList {
454 pub fn new() -> Self {
456 Self::default()
457 }
458
459 pub fn insert(&mut self, a: u32, b: u32) {
461 let pair = if a < b { (a, b) } else { (b, a) };
462 if !self.pairs.contains(&pair) {
464 self.pairs.push(pair);
465 }
466 }
467
468 pub fn insert_all(&mut self, pairs: &[(u32, u32)]) {
470 for &(a, b) in pairs {
471 self.insert(a, b);
472 }
473 }
474
475 pub fn sort(&mut self) {
477 self.pairs.sort();
478 }
479
480 pub fn pairs(&self) -> &[(u32, u32)] {
482 &self.pairs
483 }
484
485 pub fn len(&self) -> usize {
487 self.pairs.len()
488 }
489
490 pub fn is_empty(&self) -> bool {
492 self.pairs.is_empty()
493 }
494
495 pub fn remove_body(&mut self, body_id: u32) {
497 self.pairs.retain(|&(a, b)| a != body_id && b != body_id);
498 }
499
500 pub fn contains(&self, a: u32, b: u32) -> bool {
502 let p = if a < b { (a, b) } else { (b, a) };
503 self.pairs.contains(&p)
504 }
505}
506
507#[derive(Debug, Clone, Copy)]
513pub struct BvhGpuNode {
514 pub aabb: AabbGpu,
516 pub left: i32,
518 pub right: i32,
520}
521
522impl BvhGpuNode {
523 pub fn internal(aabb: AabbGpu, left: i32, right: i32) -> Self {
525 Self { aabb, left, right }
526 }
527
528 pub fn leaf(aabb: AabbGpu) -> Self {
530 let id = aabb.body_id as i32;
531 Self {
532 aabb,
533 left: -(id + 1),
534 right: -(id + 1),
535 }
536 }
537
538 pub fn is_leaf(&self) -> bool {
540 self.left < 0
541 }
542}
543
544pub fn build_bvh(aabbs: &[AabbGpu]) -> Vec<BvhGpuNode> {
549 let mut nodes = Vec::new();
550 if aabbs.is_empty() {
551 return nodes;
552 }
553 let mut indices: Vec<usize> = (0..aabbs.len()).collect();
554 build_bvh_recursive(aabbs, &mut indices, &mut nodes);
555 nodes
556}
557
558fn merge_aabbs(aabbs: &[AabbGpu], indices: &[usize]) -> AabbGpu {
559 let mut min = aabbs[indices[0]].min;
560 let mut max = aabbs[indices[0]].max;
561 for &idx in &indices[1..] {
562 let a = &aabbs[idx];
563 for k in 0..3 {
564 if a.min[k] < min[k] {
565 min[k] = a.min[k];
566 }
567 if a.max[k] > max[k] {
568 max[k] = a.max[k];
569 }
570 }
571 }
572 AabbGpu {
573 min,
574 max,
575 body_id: 0,
576 }
577}
578
579fn build_bvh_recursive(
580 aabbs: &[AabbGpu],
581 indices: &mut [usize],
582 nodes: &mut Vec<BvhGpuNode>,
583) -> i32 {
584 let n = indices.len();
585 let merged = merge_aabbs(aabbs, indices);
586
587 if n == 1 {
588 let idx = nodes.len() as i32;
589 nodes.push(BvhGpuNode::leaf(aabbs[indices[0]]));
590 return idx;
591 }
592
593 let extents = [
595 merged.max[0] - merged.min[0],
596 merged.max[1] - merged.min[1],
597 merged.max[2] - merged.min[2],
598 ];
599 let axis = if extents[0] >= extents[1] && extents[0] >= extents[2] {
600 0
601 } else if extents[1] >= extents[2] {
602 1
603 } else {
604 2
605 };
606
607 indices.sort_by(|&a, &b| {
609 let ca = (aabbs[a].min[axis] + aabbs[a].max[axis]) * 0.5;
610 let cb = (aabbs[b].min[axis] + aabbs[b].max[axis]) * 0.5;
611 ca.partial_cmp(&cb).unwrap_or(std::cmp::Ordering::Equal)
612 });
613
614 let mid = n / 2;
615 let (left_idx, right_idx) = indices.split_at_mut(mid);
616
617 let node_idx = nodes.len() as i32;
619 nodes.push(BvhGpuNode {
620 aabb: merged,
621 left: 0,
622 right: 0,
623 }); let mut left_indices = left_idx.to_vec();
626 let mut right_indices = right_idx.to_vec();
627
628 let left = build_bvh_recursive(aabbs, &mut left_indices, nodes);
629 let right = build_bvh_recursive(aabbs, &mut right_indices, nodes);
630
631 nodes[node_idx as usize].left = left;
632 nodes[node_idx as usize].right = right;
633
634 node_idx
635}
636
637pub fn lbvh_query_pairs(nodes: &[BvhGpuNode]) -> Vec<(u32, u32)> {
646 if nodes.is_empty() {
647 return Vec::new();
648 }
649 let mut pairs = Vec::new();
650 let mut stack: Vec<(usize, usize)> = Vec::new();
652 stack.push((0, 0));
654
655 while let Some((a_idx, b_idx)) = stack.pop() {
656 let na = &nodes[a_idx];
657 let nb = &nodes[b_idx];
658
659 if !na.aabb.overlaps(&nb.aabb) {
660 continue;
661 }
662
663 if na.is_leaf() && nb.is_leaf() {
664 if a_idx != b_idx {
665 let id_a = na.aabb.body_id;
666 let id_b = nb.aabb.body_id;
667 let pair = if id_a < id_b {
668 (id_a, id_b)
669 } else {
670 (id_b, id_a)
671 };
672 pairs.push(pair);
673 }
674 continue;
675 }
676
677 if na.is_leaf() {
679 if nb.left >= 0 {
681 stack.push((a_idx, nb.left as usize));
682 }
683 if nb.right >= 0 {
684 stack.push((a_idx, nb.right as usize));
685 }
686 } else if nb.is_leaf() {
687 if na.left >= 0 {
689 stack.push((na.left as usize, b_idx));
690 }
691 if na.right >= 0 {
692 stack.push((na.right as usize, b_idx));
693 }
694 } else {
695 if na.left >= 0 {
697 stack.push((na.left as usize, b_idx));
698 }
699 if na.right >= 0 {
700 stack.push((na.right as usize, b_idx));
701 }
702 }
703 }
704 pairs.sort();
705 pairs.dedup();
706 pairs
707}
708
709pub fn refit_bvh(nodes: &mut Vec<BvhGpuNode>) {
719 let n = nodes.len();
721 for i in (0..n).rev() {
722 if nodes[i].is_leaf() {
723 continue; }
725 let left_idx = nodes[i].left;
726 let right_idx = nodes[i].right;
727 if left_idx < 0 || right_idx < 0 {
728 continue;
729 }
730 let l = &nodes[left_idx as usize];
731 let r = &nodes[right_idx as usize];
732 let mut min = l.aabb.min;
733 let mut max = l.aabb.max;
734 for k in 0..3 {
735 if r.aabb.min[k] < min[k] {
736 min[k] = r.aabb.min[k];
737 }
738 if r.aabb.max[k] > max[k] {
739 max[k] = r.aabb.max[k];
740 }
741 }
742 nodes[i].aabb = AabbGpu {
743 min,
744 max,
745 body_id: 0,
746 };
747 }
748}
749
750#[allow(dead_code)]
756pub fn aabb_surface_area(aabb: &AabbGpu) -> f32 {
757 let dx = aabb.max[0] - aabb.min[0];
758 let dy = aabb.max[1] - aabb.min[1];
759 let dz = aabb.max[2] - aabb.min[2];
760 2.0 * (dx * dy + dy * dz + dz * dx)
761}
762
763#[allow(dead_code)]
770pub fn bvh_sah_cost(nodes: &[BvhGpuNode], cost_traversal: f32, cost_primitive: f32) -> f32 {
771 if nodes.is_empty() {
772 return 0.0;
773 }
774 let root_sa = aabb_surface_area(&nodes[0].aabb);
775 if root_sa < 1e-20 {
776 return 0.0;
777 }
778 let mut cost = 0.0f32;
779 for node in nodes {
780 let sa = aabb_surface_area(&node.aabb);
781 if node.is_leaf() {
782 cost += sa / root_sa * cost_primitive;
783 } else {
784 cost += sa / root_sa * cost_traversal;
785 }
786 }
787 cost
788}
789
790#[allow(dead_code)]
794pub fn bvh_depth(nodes: &[BvhGpuNode]) -> usize {
795 if nodes.is_empty() {
796 return 0;
797 }
798 let mut max_depth = 0usize;
799 let mut stack: Vec<(usize, usize)> = vec![(0, 0)];
801 while let Some((idx, depth)) = stack.pop() {
802 if depth > max_depth {
803 max_depth = depth;
804 }
805 let node = &nodes[idx];
806 if !node.is_leaf() {
807 if node.left >= 0 {
808 stack.push((node.left as usize, depth + 1));
809 }
810 if node.right >= 0 {
811 stack.push((node.right as usize, depth + 1));
812 }
813 }
814 }
815 max_depth
816}
817
818#[allow(dead_code)]
820pub fn bvh_leaf_count(nodes: &[BvhGpuNode]) -> usize {
821 nodes.iter().filter(|n| n.is_leaf()).count()
822}
823
824#[allow(dead_code)]
839pub fn sap_incremental_update(
840 existing: &CompactPairList,
841 aabbs: &[AabbGpu],
842 moved_ids: &[u32],
843) -> CompactPairList {
844 let mut new_list = existing.clone();
845
846 for &id in moved_ids {
848 new_list.remove_body(id);
849 }
850
851 for &moved_id in moved_ids {
853 if let Some(moved_aabb) = aabbs.iter().find(|a| a.body_id == moved_id) {
854 for other in aabbs {
855 if other.body_id == moved_id {
856 continue;
857 }
858 if moved_aabb.overlaps(other) {
859 new_list.insert(moved_id, other.body_id);
860 }
861 }
862 }
863 }
864
865 new_list.sort();
866 new_list
867}
868
869#[allow(dead_code)]
880pub fn sah_best_split(
881 aabbs: &[AabbGpu],
882 indices: &[usize],
883 axis: usize,
884 num_bins: usize,
885) -> Option<usize> {
886 if indices.len() < 2 || num_bins < 2 {
887 return None;
888 }
889
890 let min_c = indices
892 .iter()
893 .map(|&i| 0.5 * (aabbs[i].min[axis] + aabbs[i].max[axis]))
894 .fold(f32::INFINITY, f32::min);
895 let max_c = indices
896 .iter()
897 .map(|&i| 0.5 * (aabbs[i].min[axis] + aabbs[i].max[axis]))
898 .fold(f32::NEG_INFINITY, f32::max);
899
900 if (max_c - min_c).abs() < 1e-10 {
901 return None;
902 }
903
904 let bin_width = (max_c - min_c) / num_bins as f32;
905 let mut bin_counts = vec![0usize; num_bins];
906 let mut bin_aabbs: Vec<Option<AabbGpu>> = vec![None; num_bins];
907
908 for &i in indices {
909 let c = 0.5 * (aabbs[i].min[axis] + aabbs[i].max[axis]);
910 let bin = ((c - min_c) / bin_width).floor() as usize;
911 let bin = bin.min(num_bins - 1);
912 bin_counts[bin] += 1;
913 bin_aabbs[bin] = Some(match &bin_aabbs[bin] {
914 None => aabbs[i],
915 Some(prev) => {
916 let mut merged = *prev;
917 for k in 0..3 {
918 if aabbs[i].min[k] < merged.min[k] {
919 merged.min[k] = aabbs[i].min[k];
920 }
921 if aabbs[i].max[k] > merged.max[k] {
922 merged.max[k] = aabbs[i].max[k];
923 }
924 }
925 merged
926 }
927 });
928 }
929
930 let mut best_cost = f32::INFINITY;
931 let mut best_split = None;
932
933 for split in 1..num_bins {
934 let left_count: usize = bin_counts[..split].iter().sum();
935 let right_count: usize = bin_counts[split..].iter().sum();
936 if left_count == 0 || right_count == 0 {
937 continue;
938 }
939
940 let left_sa = bin_aabbs[..split]
942 .iter()
943 .flatten()
944 .fold(None::<AabbGpu>, |acc, a| {
945 Some(match acc {
946 None => *a,
947 Some(prev) => {
948 let mut m = prev;
949 for k in 0..3 {
950 if a.min[k] < m.min[k] {
951 m.min[k] = a.min[k];
952 }
953 if a.max[k] > m.max[k] {
954 m.max[k] = a.max[k];
955 }
956 }
957 m
958 }
959 })
960 })
961 .map(|a| aabb_surface_area(&a))
962 .unwrap_or(0.0);
963
964 let right_sa = bin_aabbs[split..]
965 .iter()
966 .flatten()
967 .fold(None::<AabbGpu>, |acc, a| {
968 Some(match acc {
969 None => *a,
970 Some(prev) => {
971 let mut m = prev;
972 for k in 0..3 {
973 if a.min[k] < m.min[k] {
974 m.min[k] = a.min[k];
975 }
976 if a.max[k] > m.max[k] {
977 m.max[k] = a.max[k];
978 }
979 }
980 m
981 }
982 })
983 })
984 .map(|a| aabb_surface_area(&a))
985 .unwrap_or(0.0);
986
987 let cost = left_sa * left_count as f32 + right_sa * right_count as f32;
988 if cost < best_cost {
989 best_cost = cost;
990 best_split = Some(split);
991 }
992 }
993
994 best_split
995}
996
997#[allow(dead_code)]
1010pub fn build_lbvh(aabbs: &[AabbGpu], cell_size: f32, origin: [f32; 3]) -> Vec<BvhGpuNode> {
1011 if aabbs.is_empty() {
1012 return Vec::new();
1013 }
1014 let sorted = morton_sort(aabbs, cell_size, origin);
1016 build_bvh(&sorted)
1018}
1019
1020#[cfg(test)]
1025mod tests {
1026 use super::*;
1027
1028 #[test]
1029 fn aabb_overlap_detects_overlapping_boxes() {
1030 #[rustfmt::skip]
1032 let aabbs: Vec<f64> = vec![
1033 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, ];
1036 let mut outputs = vec![Vec::new()];
1037 AabbOverlapKernel.execute(&[&aabbs], &mut outputs, 2);
1038 assert_eq!(outputs[0], vec![0.0, 1.0]);
1039 }
1040
1041 #[test]
1042 fn aabb_overlap_rejects_non_overlapping_boxes() {
1043 #[rustfmt::skip]
1044 let aabbs: Vec<f64> = vec![
1045 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 5.0, 6.0, 5.0, 6.0, 5.0, 6.0, ];
1048 let mut outputs = vec![Vec::new()];
1049 AabbOverlapKernel.execute(&[&aabbs], &mut outputs, 2);
1050 assert!(outputs[0].is_empty());
1051 }
1052
1053 #[test]
1054 fn test_broadphase_gpu_matches_cpu() {
1055 #[rustfmt::skip]
1057 let aabbs: Vec<f64> = vec![
1058 0.0, 1.5, 0.0, 1.5, 0.0, 1.5, 1.0, 2.5, 0.0, 1.5, 0.0, 1.5, 3.0, 4.0, 0.0, 1.5, 0.0, 1.5, ];
1062
1063 let mut gpu_outputs = vec![Vec::new()];
1065 AabbOverlapKernel.execute(&[&aabbs], &mut gpu_outputs, 3);
1066
1067 let n = aabbs.len() / 6;
1069 let mut cpu_pairs: Vec<(usize, usize)> = Vec::new();
1070 for i in 0..n {
1071 for j in (i + 1)..n {
1072 let a = &aabbs[i * 6..(i + 1) * 6];
1073 let b = &aabbs[j * 6..(j + 1) * 6];
1074 let overlaps = a[0] <= b[1]
1075 && a[1] >= b[0]
1076 && a[2] <= b[3]
1077 && a[3] >= b[2]
1078 && a[4] <= b[5]
1079 && a[5] >= b[4];
1080 if overlaps {
1081 cpu_pairs.push((i, j));
1082 }
1083 }
1084 }
1085
1086 let raw = &gpu_outputs[0];
1088 assert_eq!(raw.len() % 2, 0, "GPU output length must be even");
1089 let gpu_pairs: Vec<(usize, usize)> = raw
1090 .chunks(2)
1091 .map(|c| (c[0] as usize, c[1] as usize))
1092 .collect();
1093
1094 assert_eq!(
1095 gpu_pairs, cpu_pairs,
1096 "GPU broadphase pairs do not match CPU brute-force pairs"
1097 );
1098 }
1099
1100 #[test]
1102 fn test_morton_code_correctness() {
1103 assert_eq!(morton_code(0, 0, 0), 0);
1104 assert_eq!(morton_code(1, 0, 0), 1);
1106 assert_eq!(morton_code(0, 1, 0), 2);
1108 assert_eq!(morton_code(0, 0, 1), 4);
1110 assert_eq!(morton_code(1, 1, 1), 7);
1112 }
1113
1114 #[test]
1116 fn test_sap_finds_overlap() {
1117 let a = AabbGpu::new([0.0, 0.0, 0.0], [2.0, 2.0, 2.0], 0);
1118 let b = AabbGpu::new([1.0, 1.0, 1.0], [3.0, 3.0, 3.0], 1);
1119 let c = AabbGpu::new([10.0, 10.0, 10.0], [12.0, 12.0, 12.0], 2);
1120
1121 let pairs = SortAndSweepGpu::detect_pairs(&[a, b, c]);
1122 assert!(
1123 pairs.contains(&(0, 1)),
1124 "SAP should find pair (0,1), got {pairs:?}"
1125 );
1126 assert!(!pairs.contains(&(0, 2)), "SAP should not find pair (0,2)");
1127 assert!(!pairs.contains(&(1, 2)), "SAP should not find pair (1,2)");
1128 }
1129
1130 #[test]
1132 fn test_sap_no_overlap() {
1133 let a = AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0);
1134 let b = AabbGpu::new([5.0, 5.0, 5.0], [6.0, 6.0, 6.0], 1);
1135 let pairs = SortAndSweepGpu::detect_pairs(&[a, b]);
1136 assert!(
1137 pairs.is_empty(),
1138 "Should be no overlapping pairs, got {pairs:?}"
1139 );
1140 }
1141
1142 #[test]
1144 fn test_uniform_grid_pair_detection() {
1145 let a = AabbGpu::new([0.0, 0.0, 0.0], [2.0, 2.0, 2.0], 0);
1146 let b = AabbGpu::new([1.5, 1.5, 1.5], [3.5, 3.5, 3.5], 1);
1147 let c = AabbGpu::new([10.0, 10.0, 10.0], [12.0, 12.0, 12.0], 2);
1148
1149 let grid = UniformGridGpu::new(5.0, [0.0, 0.0, 0.0], [10, 10, 10]);
1150 let pairs = grid.query_pairs(&[a, b, c]);
1151
1152 assert!(
1154 pairs.contains(&(0, 1)),
1155 "Grid should find pair (0,1), got {pairs:?}"
1156 );
1157 assert!(
1158 !pairs
1159 .iter()
1160 .any(|&(x, y)| x == 0 && y == 2 || x == 2 && y == 0)
1161 );
1162 }
1163
1164 #[test]
1166 fn test_bvh_depth() {
1167 let aabbs: Vec<AabbGpu> = (0..8)
1168 .map(|i| {
1169 let x = (i * 3) as f32;
1170 AabbGpu::new([x, 0.0, 0.0], [x + 1.0, 1.0, 1.0], i)
1171 })
1172 .collect();
1173
1174 let nodes = build_bvh(&aabbs);
1175
1176 assert!(!nodes.is_empty(), "BVH should have at least one node");
1178 assert!(
1179 nodes.len() <= 2 * aabbs.len(),
1180 "BVH node count unexpected: {}",
1181 nodes.len()
1182 );
1183
1184 let root = &nodes[0];
1186 assert!(root.aabb.min[0] <= 0.0 + 1e-5, "Root min_x too large");
1187 assert!(
1188 root.aabb.max[0] >= 22.0 - 1e-5,
1189 "Root max_x too small: {}",
1190 root.aabb.max[0]
1191 );
1192 }
1193
1194 #[test]
1196 fn test_aabb_gpu_overlap_symmetric() {
1197 let a = AabbGpu::new([0.0, 0.0, 0.0], [2.0, 2.0, 2.0], 0);
1198 let b = AabbGpu::new([1.0, 1.0, 1.0], [3.0, 3.0, 3.0], 1);
1199 assert_eq!(a.overlaps(&b), b.overlaps(&a));
1200
1201 let c = AabbGpu::new([5.0, 5.0, 5.0], [6.0, 6.0, 6.0], 2);
1202 assert_eq!(a.overlaps(&c), c.overlaps(&a));
1203 assert!(!a.overlaps(&c));
1204 }
1205
1206 #[allow(clippy::nonminimal_bool)]
1209 #[test]
1210 fn test_sort_and_sweep_flat_finds_pair() {
1211 #[rustfmt::skip]
1212 let aabbs: Vec<f64> = vec![
1213 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 5.0, 6.0, 5.0, 6.0, 5.0, 6.0, ];
1217 let pairs = sort_and_sweep_flat(&aabbs);
1218 assert!(pairs.contains(&(0, 1)), "should find (0,1)");
1219 assert!(
1220 !pairs
1221 .iter()
1222 .any(|&(a, b)| a == 0 && b == 2 || a == 1 && b == 2)
1223 );
1224 }
1225
1226 #[test]
1227 fn test_sort_and_sweep_flat_empty() {
1228 let pairs = sort_and_sweep_flat(&[]);
1229 assert!(pairs.is_empty());
1230 }
1231
1232 #[test]
1233 fn test_sort_and_sweep_flat_no_overlap() {
1234 #[rustfmt::skip]
1235 let aabbs: Vec<f64> = vec![
1236 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
1237 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
1238 ];
1239 let pairs = sort_and_sweep_flat(&aabbs);
1240 assert!(pairs.is_empty());
1241 }
1242
1243 #[test]
1244 fn test_assign_to_grid_cells() {
1245 #[rustfmt::skip]
1246 let aabbs: Vec<f64> = vec![
1247 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, ];
1249 let cells = assign_to_grid_cells(&aabbs, 2.0, [0.0, 0.0, 0.0]);
1250 assert!(!cells.is_empty());
1251 assert!(cells.iter().all(|&(_, idx)| idx == 0));
1253 }
1254
1255 #[test]
1256 fn test_pairs_from_grid_assignments() {
1257 let assignments = vec![(0u64, 0usize), (0u64, 1usize)];
1259 let pairs = pairs_from_grid_assignments(&assignments);
1260 assert!(pairs.contains(&(0, 1)));
1261 }
1262
1263 #[test]
1264 fn test_pairs_from_grid_assignments_no_dup() {
1265 let assignments = vec![
1267 (0u64, 0usize),
1268 (0u64, 1usize),
1269 (1u64, 0usize),
1270 (1u64, 1usize),
1271 ];
1272 let pairs = pairs_from_grid_assignments(&assignments);
1273 let count = pairs.iter().filter(|&&p| p == (0, 1)).count();
1275 assert_eq!(count, 1);
1276 }
1277
1278 #[test]
1279 fn test_morton_sort_orders_aabbs() {
1280 let aabbs: Vec<AabbGpu> = vec![
1281 AabbGpu::new([4.0, 0.0, 0.0], [5.0, 1.0, 1.0], 0),
1282 AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 1),
1283 AabbGpu::new([2.0, 2.0, 2.0], [3.0, 3.0, 3.0], 2),
1284 ];
1285 let sorted = morton_sort(&aabbs, 1.0, [0.0, 0.0, 0.0]);
1286 assert_eq!(
1288 sorted[0].body_id, 1,
1289 "body at origin should be first in Morton order"
1290 );
1291 }
1292
1293 #[test]
1294 fn test_morton_key_reproducible() {
1295 let a = AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0);
1296 let k1 = morton_key_for_aabb(&a, 1.0, [0.0, 0.0, 0.0]);
1297 let k2 = morton_key_for_aabb(&a, 1.0, [0.0, 0.0, 0.0]);
1298 assert_eq!(k1, k2);
1299 }
1300
1301 #[test]
1302 fn test_compact_pair_list_insert_dedup() {
1303 let mut list = CompactPairList::new();
1304 list.insert(0, 1);
1305 list.insert(1, 0); list.insert(0, 1); assert_eq!(list.len(), 1);
1308 }
1309
1310 #[test]
1311 fn test_compact_pair_list_contains() {
1312 let mut list = CompactPairList::new();
1313 list.insert(2, 5);
1314 assert!(list.contains(2, 5));
1315 assert!(list.contains(5, 2));
1316 assert!(!list.contains(0, 1));
1317 }
1318
1319 #[test]
1320 fn test_compact_pair_list_remove_body() {
1321 let mut list = CompactPairList::new();
1322 list.insert(0, 1);
1323 list.insert(0, 2);
1324 list.insert(1, 2);
1325 list.remove_body(0);
1326 assert!(!list.contains(0, 1));
1327 assert!(!list.contains(0, 2));
1328 assert!(list.contains(1, 2));
1329 }
1330
1331 #[test]
1332 fn test_compact_pair_list_insert_all() {
1333 let mut list = CompactPairList::new();
1334 list.insert_all(&[(0, 1), (1, 2), (0, 2)]);
1335 assert_eq!(list.len(), 3);
1336 }
1337
1338 #[test]
1339 fn test_compact_pair_list_sort() {
1340 let mut list = CompactPairList::new();
1341 list.insert(3, 4);
1342 list.insert(0, 1);
1343 list.insert(1, 2);
1344 list.sort();
1345 assert_eq!(list.pairs()[0], (0, 1));
1346 }
1347
1348 #[test]
1351 fn test_lbvh_query_pairs_finds_overlap() {
1352 let a = AabbGpu::new([0.0, 0.0, 0.0], [2.0, 2.0, 2.0], 0);
1354 let b = AabbGpu::new([1.0, 1.0, 1.0], [3.0, 3.0, 3.0], 1);
1355 let nodes = build_bvh(&[a, b]);
1356 let pairs = lbvh_query_pairs(&nodes);
1357 assert!(
1358 pairs.contains(&(0, 1)),
1359 "LBVH traversal should find (0,1): {pairs:?}"
1360 );
1361 }
1362
1363 #[test]
1364 fn test_lbvh_query_pairs_no_overlap() {
1365 let a = AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0);
1366 let b = AabbGpu::new([5.0, 5.0, 5.0], [6.0, 6.0, 6.0], 1);
1367 let nodes = build_bvh(&[a, b]);
1368 let pairs = lbvh_query_pairs(&nodes);
1369 assert!(
1370 pairs.is_empty(),
1371 "Non-overlapping: should have no pairs: {pairs:?}"
1372 );
1373 }
1374
1375 #[test]
1376 fn test_lbvh_query_empty_bvh() {
1377 let pairs = lbvh_query_pairs(&[]);
1378 assert!(pairs.is_empty());
1379 }
1380
1381 #[test]
1384 fn test_refit_bvh_no_panic() {
1385 let aabbs: Vec<AabbGpu> = (0..4)
1386 .map(|i| {
1387 let x = (i * 2) as f32;
1388 AabbGpu::new([x, 0.0, 0.0], [x + 1.0, 1.0, 1.0], i)
1389 })
1390 .collect();
1391 let mut nodes = build_bvh(&aabbs);
1392 for node in nodes.iter_mut() {
1394 if node.is_leaf() {
1395 node.aabb.max[0] += 0.5;
1396 }
1397 }
1398 refit_bvh(&mut nodes);
1399 assert!(!nodes.is_empty());
1401 }
1402
1403 #[test]
1404 fn test_refit_bvh_root_encompasses_leaves() {
1405 let aabbs: Vec<AabbGpu> = vec![
1406 AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0),
1407 AabbGpu::new([10.0, 0.0, 0.0], [11.0, 1.0, 1.0], 1),
1408 ];
1409 let mut nodes = build_bvh(&aabbs);
1410 refit_bvh(&mut nodes);
1411 assert!(nodes[0].aabb.min[0] <= 0.0 + 1e-5);
1413 assert!(nodes[0].aabb.max[0] >= 11.0 - 1e-5);
1414 }
1415
1416 #[test]
1419 fn test_aabb_surface_area() {
1420 let a = AabbGpu::new([0.0, 0.0, 0.0], [1.0, 2.0, 3.0], 0);
1421 let sa = aabb_surface_area(&a);
1422 assert!((sa - 22.0).abs() < 1e-5, "SA = {sa}");
1424 }
1425
1426 #[test]
1427 fn test_aabb_surface_area_unit_cube() {
1428 let a = AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0);
1429 let sa = aabb_surface_area(&a);
1430 assert!((sa - 6.0).abs() < 1e-5, "unit cube SA = {sa}");
1431 }
1432
1433 #[test]
1434 fn test_bvh_sah_cost_positive() {
1435 let aabbs: Vec<AabbGpu> = (0..4)
1436 .map(|i| {
1437 let x = (i * 3) as f32;
1438 AabbGpu::new([x, 0.0, 0.0], [x + 2.0, 2.0, 2.0], i)
1439 })
1440 .collect();
1441 let nodes = build_bvh(&aabbs);
1442 let cost = bvh_sah_cost(&nodes, 1.0, 1.0);
1443 assert!(cost > 0.0, "SAH cost should be positive: {cost}");
1444 assert!(cost.is_finite(), "SAH cost should be finite");
1445 }
1446
1447 #[test]
1448 fn test_bvh_depth_single_leaf() {
1449 let aabbs = vec![AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0)];
1450 let nodes = build_bvh(&aabbs);
1451 let d = bvh_depth(&nodes);
1452 assert_eq!(d, 0, "single leaf depth = {d}");
1453 }
1454
1455 #[test]
1456 fn test_bvh_depth_two_leaves() {
1457 let aabbs = vec![
1458 AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0),
1459 AabbGpu::new([2.0, 0.0, 0.0], [3.0, 1.0, 1.0], 1),
1460 ];
1461 let nodes = build_bvh(&aabbs);
1462 let d = bvh_depth(&nodes);
1463 assert!(d >= 1, "two-leaf BVH depth >= 1, got {d}");
1464 }
1465
1466 #[test]
1467 fn test_bvh_leaf_count() {
1468 let aabbs: Vec<AabbGpu> = (0..8)
1469 .map(|i| {
1470 let x = (i * 2) as f32;
1471 AabbGpu::new([x, 0.0, 0.0], [x + 1.0, 1.0, 1.0], i)
1472 })
1473 .collect();
1474 let nodes = build_bvh(&aabbs);
1475 let leaves = bvh_leaf_count(&nodes);
1476 assert_eq!(leaves, 8, "Expected 8 leaves, got {leaves}");
1477 }
1478
1479 #[test]
1482 fn test_sap_incremental_removes_moved() {
1483 let a = AabbGpu::new([0.0, 0.0, 0.0], [2.0, 2.0, 2.0], 0);
1484 let b = AabbGpu::new([1.0, 1.0, 1.0], [3.0, 3.0, 3.0], 1);
1485 let c = AabbGpu::new([10.0, 10.0, 10.0], [11.0, 11.0, 11.0], 2);
1486 let initial_pairs = SortAndSweepGpu::detect_pairs(&[a, b, c]);
1487 let mut existing = CompactPairList::new();
1488 existing.insert_all(&initial_pairs);
1489
1490 let a_moved = AabbGpu::new([20.0, 20.0, 20.0], [21.0, 21.0, 21.0], 0);
1492 let updated = sap_incremental_update(&existing, &[a_moved, b, c], &[0]);
1493
1494 assert!(
1496 !updated.contains(0, 1),
1497 "pair (0,1) should be removed after move"
1498 );
1499 }
1500
1501 #[test]
1502 fn test_sap_incremental_adds_new_overlap() {
1503 let _a = AabbGpu::new([10.0, 0.0, 0.0], [11.0, 1.0, 1.0], 0);
1504 let b = AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 1);
1505 let existing = CompactPairList::new(); let a_moved = AabbGpu::new([0.5, 0.5, 0.5], [1.5, 1.5, 1.5], 0);
1509 let updated = sap_incremental_update(&existing, &[a_moved, b], &[0]);
1510 assert!(
1511 updated.contains(0, 1),
1512 "pair (0,1) should be added after move: {:?}",
1513 updated.pairs()
1514 );
1515 }
1516
1517 #[test]
1520 fn test_sah_best_split_basic() {
1521 let aabbs: Vec<AabbGpu> = (0..8)
1522 .map(|i| {
1523 let x = (i * 2) as f32;
1524 AabbGpu::new([x, 0.0, 0.0], [x + 1.5, 1.0, 1.0], i)
1525 })
1526 .collect();
1527 let indices: Vec<usize> = (0..8).collect();
1528 let result = sah_best_split(&aabbs, &indices, 0, 8);
1529 assert!(result.is_some(), "SAH split should find a valid split");
1530 }
1531
1532 #[test]
1533 fn test_sah_best_split_single_element() {
1534 let aabbs = vec![AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0)];
1535 let result = sah_best_split(&aabbs, &[0], 0, 4);
1536 assert!(result.is_none(), "Single element: no split possible");
1537 }
1538
1539 #[test]
1542 fn test_build_lbvh_nonempty() {
1543 let aabbs: Vec<AabbGpu> = (0..6)
1544 .map(|i| {
1545 let x = (i * 2) as f32;
1546 AabbGpu::new([x, 0.0, 0.0], [x + 1.0, 1.0, 1.0], i)
1547 })
1548 .collect();
1549 let nodes = build_lbvh(&aabbs, 1.0, [0.0, 0.0, 0.0]);
1550 assert!(!nodes.is_empty(), "LBVH should build non-empty node list");
1551 assert!(nodes.len() <= 2 * aabbs.len());
1552 }
1553
1554 #[test]
1555 fn test_build_lbvh_empty() {
1556 let nodes = build_lbvh(&[], 1.0, [0.0, 0.0, 0.0]);
1557 assert!(nodes.is_empty());
1558 }
1559
1560 #[test]
1561 fn test_build_lbvh_single() {
1562 let aabbs = vec![AabbGpu::new([0.0, 0.0, 0.0], [1.0, 1.0, 1.0], 0)];
1563 let nodes = build_lbvh(&aabbs, 1.0, [0.0, 0.0, 0.0]);
1564 assert_eq!(nodes.len(), 1);
1565 assert!(nodes[0].is_leaf());
1566 }
1567
1568 #[test]
1569 fn test_lbvh_vs_brute_force_pairs() {
1570 let aabbs: Vec<AabbGpu> = (0..6)
1572 .map(|i| {
1573 let x = (i as f32) * 0.8;
1574 AabbGpu::new([x, 0.0, 0.0], [x + 1.0, 1.0, 1.0], i as u32)
1575 })
1576 .collect();
1577
1578 let lbvh_nodes = build_lbvh(&aabbs, 0.5, [0.0, 0.0, 0.0]);
1579 let mut lbvh_pairs = lbvh_query_pairs(&lbvh_nodes);
1580 lbvh_pairs.sort();
1581 lbvh_pairs.dedup();
1582
1583 let mut brute: Vec<(u32, u32)> = Vec::new();
1584 for i in 0..aabbs.len() {
1585 for j in (i + 1)..aabbs.len() {
1586 if aabbs[i].overlaps(&aabbs[j]) {
1587 brute.push((aabbs[i].body_id, aabbs[j].body_id));
1588 }
1589 }
1590 }
1591 brute.sort();
1592
1593 assert_eq!(
1594 lbvh_pairs, brute,
1595 "LBVH pairs {lbvh_pairs:?} != brute force {brute:?}"
1596 );
1597 }
1598}