mnemosyne-graph-core 0.1.0

Shared graph kernels for the Mnemosyne memory substrate: force-directed simulation, R-tree viewport index, and the SIMD primitives both the native PyO3 crate and the WASM sub-crate depend on.
Documentation
//! Barnes-Hut 3D force simulation internals — Phase 224 Wave 2.
//!
//! Parallel to the 2D implementation in [`crate::force`], this module
//! hosts the 3D octree (`OctNode`) and the per-tick force-accumulation
//! helpers that `Simulation::tick_3d` calls. The public `Simulation`
//! API lives in `force.rs`; this file keeps the 3D tree mechanics off
//! to the side so the 2D hot path stays readable.
//!
//! ## Tree shape
//!
//! Same flat-arena pattern as the quadtree: every node is a
//! self-contained `OctNode` stored in a `Vec<OctNode>`, children are
//! referenced by index with `u32::MAX` as the "no child" sentinel.
//! The split count is the only structural change — 8 children per
//! internal node instead of 4. The mass / centre-of-mass aggregation,
//! the one-body-leaf split-on-insert trick, and the
//! `size^2 < theta^2 * dist^2` Barnes-Hut approximation check are
//! all identical in shape.
//!
//! ## SIMD
//!
//! `graph_core::util::dot_simd` is the AVX2 fast-path for wide-float
//! dot products. A 3-float vector is still far below the
//! break-even point for that kernel (one FMA is enough), so the
//! per-pair force computation stays scalar. The fused center-gravity
//! reduction over all positions at once is the only 3D-specific place
//! where `dot_simd` would pay off (batched self-dot over the 3D
//! positions array flattened to `&[f32]`). That optimisation is
//! tracked as future work — Wave 2's priority is correctness and
//! parity with the 2D path.

use glam::Vec3;

use crate::util::dot_simd;

/// A single octree node. Same shape as the quadtree's `QuadNode` but
/// with 8-way children and 3D extents. See the 2D implementation for
/// an explanation of the three leaf/internal states.
#[derive(Debug, Clone, Copy)]
pub(crate) struct OctNode {
    min: Vec3,
    max: Vec3,
    mass: f32,
    com: Vec3,
    body: Option<u32>,
    children: [u32; 8], // u32::MAX sentinel for "no child"
    has_children: bool,
}

impl OctNode {
    fn empty(min: Vec3, max: Vec3) -> Self {
        Self {
            min,
            max,
            mass: 0.0,
            com: Vec3::ZERO,
            body: None,
            children: [u32::MAX; 8],
            has_children: false,
        }
    }

    /// Read-only accessor for tests and debugging. Crate-internal.
    #[cfg(test)]
    pub(crate) fn mass(&self) -> f32 {
        self.mass
    }

    /// Read-only accessor for tests and debugging. Crate-internal.
    #[cfg(test)]
    pub(crate) fn com(&self) -> Vec3 {
        self.com
    }

    #[inline]
    fn size(&self) -> f32 {
        (self.max - self.min).max_element()
    }

    #[inline]
    fn center(&self) -> Vec3 {
        (self.min + self.max) * 0.5
    }

    /// Return 0..8 based on which octant `p` falls into relative to
    /// this node's centre. Bit 0 = east (x >= cx), bit 1 = north
    /// (y >= cy), bit 2 = up (z >= cz).
    #[inline]
    fn octant_of(&self, p: Vec3) -> usize {
        let c = self.center();
        let east = (p.x >= c.x) as usize;
        let north = (p.y >= c.y) as usize;
        let up = (p.z >= c.z) as usize;
        (up << 2) | (north << 1) | east
    }

    #[inline]
    fn child_bounds(&self, q: usize) -> (Vec3, Vec3) {
        let c = self.center();
        let (x_min, x_max) = if q & 1 == 1 {
            (c.x, self.max.x)
        } else {
            (self.min.x, c.x)
        };
        let (y_min, y_max) = if q & 2 == 2 {
            (c.y, self.max.y)
        } else {
            (self.min.y, c.y)
        };
        let (z_min, z_max) = if q & 4 == 4 {
            (c.z, self.max.z)
        } else {
            (self.min.z, c.z)
        };
        (
            Vec3::new(x_min, y_min, z_min),
            Vec3::new(x_max, y_max, z_max),
        )
    }
}

/// Build a flat-arena Barnes-Hut octree covering `positions`. The
/// `min_cell_size` threshold is the same one the 2D quadtree uses —
/// below it coincident bodies accumulate into a single leaf instead
/// of splitting further, which keeps pathological inputs from
/// blowing up the arena.
pub(crate) fn build_octree(arena: &mut Vec<OctNode>, positions: &[Vec3], min_cell_size: f32) {
    arena.clear();
    let n = positions.len();
    if n == 0 {
        return;
    }

    let mut min = positions[0];
    let mut max = positions[0];
    for p in &positions[1..] {
        min = min.min(*p);
        max = max.max(*p);
    }
    // Cube up the bounding box so octants stay cubic. Guard against
    // degenerate (all-coincident) inputs.
    let extent = (max - min).max_element().max(min_cell_size);
    let center = (min + max) * 0.5;
    let half = Vec3::splat(extent * 0.5);
    let min = center - half;
    let max = center + half;

    arena.push(OctNode::empty(min, max));
    for (i, p) in positions.iter().enumerate() {
        insert(arena, 0, i as u32, *p, min_cell_size);
    }
    finalise(arena, 0);
}

fn insert(tree: &mut Vec<OctNode>, idx: usize, body: u32, pos: Vec3, min_cell_size: f32) {
    // Case 1: empty leaf.
    if !tree[idx].has_children && tree[idx].body.is_none() && tree[idx].mass == 0.0 {
        tree[idx].body = Some(body);
        tree[idx].com = pos;
        tree[idx].mass = 1.0;
        return;
    }

    // Case 2: one-body leaf. Split if we still have room.
    if !tree[idx].has_children {
        if tree[idx].size() <= min_cell_size {
            let new_mass = tree[idx].mass + 1.0;
            tree[idx].com = (tree[idx].com * tree[idx].mass + pos) / new_mass;
            tree[idx].mass = new_mass;
            return;
        }
        let existing = tree[idx].body.take().expect("leaf without a body");
        let existing_pos = tree[idx].com;
        tree[idx].mass = 0.0;
        tree[idx].com = Vec3::ZERO;
        tree[idx].has_children = true;

        let q_existing = tree[idx].octant_of(existing_pos);
        let c_existing = create_or_get_child(tree, idx, q_existing);
        insert(tree, c_existing, existing, existing_pos, min_cell_size);

        let q_new = tree[idx].octant_of(pos);
        let c_new = create_or_get_child(tree, idx, q_new);
        insert(tree, c_new, body, pos, min_cell_size);
        return;
    }

    // Case 3: internal node.
    let q = tree[idx].octant_of(pos);
    let c = create_or_get_child(tree, idx, q);
    insert(tree, c, body, pos, min_cell_size);
}

fn create_or_get_child(tree: &mut Vec<OctNode>, parent: usize, octant: usize) -> usize {
    let existing = tree[parent].children[octant];
    if existing != u32::MAX {
        return existing as usize;
    }
    let (cmin, cmax) = tree[parent].child_bounds(octant);
    let idx = tree.len();
    tree.push(OctNode::empty(cmin, cmax));
    tree[parent].children[octant] = idx as u32;
    idx
}

fn finalise(tree: &mut [OctNode], idx: usize) {
    if !tree[idx].has_children {
        return;
    }
    let children = tree[idx].children;
    let mut mass = 0.0;
    let mut com = Vec3::ZERO;
    for &c in &children {
        if c == u32::MAX {
            continue;
        }
        finalise(tree, c as usize);
        let child = tree[c as usize];
        mass += child.mass;
        com += child.com * child.mass;
    }
    if mass > 0.0 {
        com /= mass;
    }
    tree[idx].mass = mass;
    tree[idx].com = com;
}

/// Walk the octree and accumulate repulsion on the body at
/// `target_pos`. Same Barnes-Hut approximation as the 2D path — when
/// `size^2 < theta^2 * distance^2` the subtree is collapsed to its
/// centre of mass.
pub(crate) fn accumulate_repulsion_3d(
    tree: &[OctNode],
    idx: usize,
    target_pos: Vec3,
    target_idx: u32,
    theta2: f32,
    repulsion: f32,
) -> Vec3 {
    let node = &tree[idx];
    if node.mass <= 0.0 {
        return Vec3::ZERO;
    }

    if !node.has_children {
        if let Some(body) = node.body {
            if body == target_idx {
                let residual_mass = node.mass - 1.0;
                if residual_mass <= 0.0 {
                    return Vec3::ZERO;
                }
                return pair_force_3d(target_pos, node.com, residual_mass, repulsion);
            }
        }
        return pair_force_3d(target_pos, node.com, node.mass, repulsion);
    }

    let delta = node.com - target_pos;
    let dist2 = delta.length_squared();
    let size = node.size();
    if size * size < theta2 * dist2 {
        return pair_force_3d(target_pos, node.com, node.mass, repulsion);
    }

    let mut total = Vec3::ZERO;
    for &c in &node.children {
        if c == u32::MAX {
            continue;
        }
        total +=
            accumulate_repulsion_3d(tree, c as usize, target_pos, target_idx, theta2, repulsion);
    }
    total
}

/// Coulomb-like pairwise repulsion in 3D. Same softening and sign
/// convention as the 2D `pair_force`.
#[inline]
fn pair_force_3d(from: Vec3, to: Vec3, mass: f32, repulsion: f32) -> Vec3 {
    let delta = to - from;
    let dist2 = delta.length_squared() + 0.01;
    let dist = dist2.sqrt();
    let dir = delta / dist;
    let magnitude = repulsion * mass / dist2;
    dir * magnitude
}

/// Compute the sum of squared norms over a dense 3D position slice.
///
/// Batched via the shared `dot_simd` primitive — treats the `&[Vec3]`
/// as a flat `&[f32]` of length `3 * positions.len()` and runs
/// self-dot in one pass. Exposed for Wave 2's center-gravity batched
/// reduction (see docs above); wired into the callers in a follow-up.
#[allow(dead_code)]
pub(crate) fn sum_positions_sqnorm(positions: &[Vec3]) -> f32 {
    if positions.is_empty() {
        return 0.0;
    }
    // SAFETY: `glam::Vec3` is `#[repr(C)]` with three `f32` fields and
    // 4-byte alignment. Reinterpreting `&[Vec3]` as a `&[f32]` of
    // length `3 * n` matches the layout exactly.
    let flat: &[f32] = unsafe {
        std::slice::from_raw_parts(positions.as_ptr() as *const f32, positions.len() * 3)
    };
    dot_simd(flat, flat)
}