Skip to main content

graph_core/
force_3d.rs

1//! Barnes-Hut 3D force simulation internals — Phase 224 Wave 2.
2//!
3//! Parallel to the 2D implementation in [`crate::force`], this module
4//! hosts the 3D octree (`OctNode`) and the per-tick force-accumulation
5//! helpers that `Simulation::tick_3d` calls. The public `Simulation`
6//! API lives in `force.rs`; this file keeps the 3D tree mechanics off
7//! to the side so the 2D hot path stays readable.
8//!
9//! ## Tree shape
10//!
11//! Same flat-arena pattern as the quadtree: every node is a
12//! self-contained `OctNode` stored in a `Vec<OctNode>`, children are
13//! referenced by index with `u32::MAX` as the "no child" sentinel.
14//! The split count is the only structural change — 8 children per
15//! internal node instead of 4. The mass / centre-of-mass aggregation,
16//! the one-body-leaf split-on-insert trick, and the
17//! `size^2 < theta^2 * dist^2` Barnes-Hut approximation check are
18//! all identical in shape.
19//!
20//! ## SIMD
21//!
22//! `graph_core::util::dot_simd` is the AVX2 fast-path for wide-float
23//! dot products. A 3-float vector is still far below the
24//! break-even point for that kernel (one FMA is enough), so the
25//! per-pair force computation stays scalar. The fused center-gravity
26//! reduction over all positions at once is the only 3D-specific place
27//! where `dot_simd` would pay off (batched self-dot over the 3D
28//! positions array flattened to `&[f32]`). That optimisation is
29//! tracked as future work — Wave 2's priority is correctness and
30//! parity with the 2D path.
31
32use glam::Vec3;
33
34use crate::util::dot_simd;
35
36/// A single octree node. Same shape as the quadtree's `QuadNode` but
37/// with 8-way children and 3D extents. See the 2D implementation for
38/// an explanation of the three leaf/internal states.
39#[derive(Debug, Clone, Copy)]
40pub(crate) struct OctNode {
41    min: Vec3,
42    max: Vec3,
43    mass: f32,
44    com: Vec3,
45    body: Option<u32>,
46    children: [u32; 8], // u32::MAX sentinel for "no child"
47    has_children: bool,
48}
49
50impl OctNode {
51    fn empty(min: Vec3, max: Vec3) -> Self {
52        Self {
53            min,
54            max,
55            mass: 0.0,
56            com: Vec3::ZERO,
57            body: None,
58            children: [u32::MAX; 8],
59            has_children: false,
60        }
61    }
62
63    /// Read-only accessor for tests and debugging. Crate-internal.
64    #[cfg(test)]
65    pub(crate) fn mass(&self) -> f32 {
66        self.mass
67    }
68
69    /// Read-only accessor for tests and debugging. Crate-internal.
70    #[cfg(test)]
71    pub(crate) fn com(&self) -> Vec3 {
72        self.com
73    }
74
75    #[inline]
76    fn size(&self) -> f32 {
77        (self.max - self.min).max_element()
78    }
79
80    #[inline]
81    fn center(&self) -> Vec3 {
82        (self.min + self.max) * 0.5
83    }
84
85    /// Return 0..8 based on which octant `p` falls into relative to
86    /// this node's centre. Bit 0 = east (x >= cx), bit 1 = north
87    /// (y >= cy), bit 2 = up (z >= cz).
88    #[inline]
89    fn octant_of(&self, p: Vec3) -> usize {
90        let c = self.center();
91        let east = (p.x >= c.x) as usize;
92        let north = (p.y >= c.y) as usize;
93        let up = (p.z >= c.z) as usize;
94        (up << 2) | (north << 1) | east
95    }
96
97    #[inline]
98    fn child_bounds(&self, q: usize) -> (Vec3, Vec3) {
99        let c = self.center();
100        let (x_min, x_max) = if q & 1 == 1 {
101            (c.x, self.max.x)
102        } else {
103            (self.min.x, c.x)
104        };
105        let (y_min, y_max) = if q & 2 == 2 {
106            (c.y, self.max.y)
107        } else {
108            (self.min.y, c.y)
109        };
110        let (z_min, z_max) = if q & 4 == 4 {
111            (c.z, self.max.z)
112        } else {
113            (self.min.z, c.z)
114        };
115        (
116            Vec3::new(x_min, y_min, z_min),
117            Vec3::new(x_max, y_max, z_max),
118        )
119    }
120}
121
122/// Build a flat-arena Barnes-Hut octree covering `positions`. The
123/// `min_cell_size` threshold is the same one the 2D quadtree uses —
124/// below it coincident bodies accumulate into a single leaf instead
125/// of splitting further, which keeps pathological inputs from
126/// blowing up the arena.
127pub(crate) fn build_octree(arena: &mut Vec<OctNode>, positions: &[Vec3], min_cell_size: f32) {
128    arena.clear();
129    let n = positions.len();
130    if n == 0 {
131        return;
132    }
133
134    let mut min = positions[0];
135    let mut max = positions[0];
136    for p in &positions[1..] {
137        min = min.min(*p);
138        max = max.max(*p);
139    }
140    // Cube up the bounding box so octants stay cubic. Guard against
141    // degenerate (all-coincident) inputs.
142    let extent = (max - min).max_element().max(min_cell_size);
143    let center = (min + max) * 0.5;
144    let half = Vec3::splat(extent * 0.5);
145    let min = center - half;
146    let max = center + half;
147
148    arena.push(OctNode::empty(min, max));
149    for (i, p) in positions.iter().enumerate() {
150        insert(arena, 0, i as u32, *p, min_cell_size);
151    }
152    finalise(arena, 0);
153}
154
155fn insert(tree: &mut Vec<OctNode>, idx: usize, body: u32, pos: Vec3, min_cell_size: f32) {
156    // Case 1: empty leaf.
157    if !tree[idx].has_children && tree[idx].body.is_none() && tree[idx].mass == 0.0 {
158        tree[idx].body = Some(body);
159        tree[idx].com = pos;
160        tree[idx].mass = 1.0;
161        return;
162    }
163
164    // Case 2: one-body leaf. Split if we still have room.
165    if !tree[idx].has_children {
166        if tree[idx].size() <= min_cell_size {
167            let new_mass = tree[idx].mass + 1.0;
168            tree[idx].com = (tree[idx].com * tree[idx].mass + pos) / new_mass;
169            tree[idx].mass = new_mass;
170            return;
171        }
172        let existing = tree[idx].body.take().expect("leaf without a body");
173        let existing_pos = tree[idx].com;
174        tree[idx].mass = 0.0;
175        tree[idx].com = Vec3::ZERO;
176        tree[idx].has_children = true;
177
178        let q_existing = tree[idx].octant_of(existing_pos);
179        let c_existing = create_or_get_child(tree, idx, q_existing);
180        insert(tree, c_existing, existing, existing_pos, min_cell_size);
181
182        let q_new = tree[idx].octant_of(pos);
183        let c_new = create_or_get_child(tree, idx, q_new);
184        insert(tree, c_new, body, pos, min_cell_size);
185        return;
186    }
187
188    // Case 3: internal node.
189    let q = tree[idx].octant_of(pos);
190    let c = create_or_get_child(tree, idx, q);
191    insert(tree, c, body, pos, min_cell_size);
192}
193
194fn create_or_get_child(tree: &mut Vec<OctNode>, parent: usize, octant: usize) -> usize {
195    let existing = tree[parent].children[octant];
196    if existing != u32::MAX {
197        return existing as usize;
198    }
199    let (cmin, cmax) = tree[parent].child_bounds(octant);
200    let idx = tree.len();
201    tree.push(OctNode::empty(cmin, cmax));
202    tree[parent].children[octant] = idx as u32;
203    idx
204}
205
206fn finalise(tree: &mut [OctNode], idx: usize) {
207    if !tree[idx].has_children {
208        return;
209    }
210    let children = tree[idx].children;
211    let mut mass = 0.0;
212    let mut com = Vec3::ZERO;
213    for &c in &children {
214        if c == u32::MAX {
215            continue;
216        }
217        finalise(tree, c as usize);
218        let child = tree[c as usize];
219        mass += child.mass;
220        com += child.com * child.mass;
221    }
222    if mass > 0.0 {
223        com /= mass;
224    }
225    tree[idx].mass = mass;
226    tree[idx].com = com;
227}
228
229/// Walk the octree and accumulate repulsion on the body at
230/// `target_pos`. Same Barnes-Hut approximation as the 2D path — when
231/// `size^2 < theta^2 * distance^2` the subtree is collapsed to its
232/// centre of mass.
233pub(crate) fn accumulate_repulsion_3d(
234    tree: &[OctNode],
235    idx: usize,
236    target_pos: Vec3,
237    target_idx: u32,
238    theta2: f32,
239    repulsion: f32,
240) -> Vec3 {
241    let node = &tree[idx];
242    if node.mass <= 0.0 {
243        return Vec3::ZERO;
244    }
245
246    if !node.has_children {
247        if let Some(body) = node.body {
248            if body == target_idx {
249                let residual_mass = node.mass - 1.0;
250                if residual_mass <= 0.0 {
251                    return Vec3::ZERO;
252                }
253                return pair_force_3d(target_pos, node.com, residual_mass, repulsion);
254            }
255        }
256        return pair_force_3d(target_pos, node.com, node.mass, repulsion);
257    }
258
259    let delta = node.com - target_pos;
260    let dist2 = delta.length_squared();
261    let size = node.size();
262    if size * size < theta2 * dist2 {
263        return pair_force_3d(target_pos, node.com, node.mass, repulsion);
264    }
265
266    let mut total = Vec3::ZERO;
267    for &c in &node.children {
268        if c == u32::MAX {
269            continue;
270        }
271        total +=
272            accumulate_repulsion_3d(tree, c as usize, target_pos, target_idx, theta2, repulsion);
273    }
274    total
275}
276
277/// Coulomb-like pairwise repulsion in 3D. Same softening and sign
278/// convention as the 2D `pair_force`.
279#[inline]
280fn pair_force_3d(from: Vec3, to: Vec3, mass: f32, repulsion: f32) -> Vec3 {
281    let delta = to - from;
282    let dist2 = delta.length_squared() + 0.01;
283    let dist = dist2.sqrt();
284    let dir = delta / dist;
285    let magnitude = repulsion * mass / dist2;
286    dir * magnitude
287}
288
289/// Compute the sum of squared norms over a dense 3D position slice.
290///
291/// Batched via the shared `dot_simd` primitive — treats the `&[Vec3]`
292/// as a flat `&[f32]` of length `3 * positions.len()` and runs
293/// self-dot in one pass. Exposed for Wave 2's center-gravity batched
294/// reduction (see docs above); wired into the callers in a follow-up.
295#[allow(dead_code)]
296pub(crate) fn sum_positions_sqnorm(positions: &[Vec3]) -> f32 {
297    if positions.is_empty() {
298        return 0.0;
299    }
300    // SAFETY: `glam::Vec3` is `#[repr(C)]` with three `f32` fields and
301    // 4-byte alignment. Reinterpreting `&[Vec3]` as a `&[f32]` of
302    // length `3 * n` matches the layout exactly.
303    let flat: &[f32] = unsafe {
304        std::slice::from_raw_parts(positions.as_ptr() as *const f32, positions.len() * 3)
305    };
306    dot_simd(flat, flat)
307}