rustsim-crowd 0.0.1

//! Social Force Model (Helbing & Molnár 1995; Helbing, Farkas & Vicsek 2000).
//!
//! Each pedestrian experiences a sum of three forces:
//!
//! - a driving force pulling it toward its destination at its desired speed,
//! - a repulsive social force from every other pedestrian,
//! - a repulsive force from every static obstacle (wall segment).
//!
//! The resulting acceleration is integrated with **semi-implicit
//! (symplectic) Euler**: each tick first updates velocity from the
//! current acceleration, then advances position using the *new*
//! velocity (`v ← v + a·dt; p ← p + v·dt`). This is the classic
//! stability fix for stiff pair interactions — it preserves energy
//! far better than fully explicit Euler at identical `dt` and is
//! the same integration scheme used by JuPedSim and PedSim for the
//! Social Force family. Combined with the `Params::max_accel` cap
//! and the `dt · max_accel ≤ max_speed` CFL check in
//! [`Params::validate`], this keeps the integrator stable at the
//! default 30 Hz timestep even under panic-flow parameterisation.
//! The speed is clipped to `params.max_speed` after the velocity
//! update, before the position advance.
//!
//! This implementation follows Eq. 1–3 of
//! Helbing, Farkas & Vicsek (2000), "Simulating dynamical features of
//! escape panic", *Nature* 407, 487–490, with the elliptical repulsion
//! shape from Helbing & Molnár (1995).
//!
//! # References
//!
//! - Helbing, D., & Molnár, P. (1995). "Social force model for pedestrian
//!   dynamics". *Physical Review E*, 51(5), 4282–4286.
//! - Helbing, D., Farkas, I., & Vicsek, T. (2000). "Simulating dynamical
//!   features of escape panic". *Nature*, 407(6803), 487–490.

use crate::broadphase::{NeighborGrid, Scratch};
use crate::common::{add, clamp_speed, closest_point_on_segment, norm, scale, sub};
use crate::common::{Pedestrian, PedestrianModel, Vec2, WallSegment};

/// Parameters for the Social Force model.
///
/// Defaults are taken from Helbing, Farkas & Vicsek (2000) Table 1.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Params {
    /// Relaxation time toward the desired velocity (s).
    pub tau: f64,
    /// Interaction strength between pedestrians (N).
    pub a_ped: f64,
    /// Interaction range between pedestrians (m).
    pub b_ped: f64,
    /// Interaction strength with walls (N).
    pub a_wall: f64,
    /// Interaction range with walls (m).
    pub b_wall: f64,
    /// Pedestrian mass (kg).
    pub mass: f64,
    /// Hard upper bound on speed after integration (m/s).
    pub max_speed: f64,
    /// Arrival radius (m). Inside this distance the desired speed
    /// tapers linearly to zero so the agent decelerates into its
    /// destination instead of overshooting and oscillating. Set to
    /// `0.0` to disable the taper. Default: 0.3 m.
    pub arrival_radius: f64,
    /// Hard upper bound on acceleration magnitude (m/s²).
    ///
    /// The Helbing exponential repulsion is numerically stiff: at a
    /// light body overlap of 0.1 m the pair force with default `a_ped
    /// = 2000 N` and `b_ped = 0.08 m` is already ~5.4 kN, i.e.
    /// ~68 m/s² for an 80 kg pedestrian. Explicit integration at
    /// `dt = 0.05 s` would push the velocity by 3.4 m/s in a single
    /// tick, relying entirely on [`max_speed`](Self::max_speed) to
    /// keep the trajectory physical. Clamping `|a| ≤ max_accel`
    /// before integration gives a well-behaved CFL-like bound: the
    /// default of 20 m/s² (≈ 2 g) is enough headroom for crowded
    /// panic flows but low enough that `dt * max_accel ≤ max_speed`
    /// at typical simulation rates. JuPedSim and PedSim apply the
    /// same cap; this is a standard production hardening for SFM.
    pub max_accel: f64,
}

impl Default for Params {
    fn default() -> Self {
        Self {
            tau: 0.5,
            a_ped: 2000.0,
            b_ped: 0.08,
            a_wall: 2000.0,
            b_wall: 0.08,
            mass: 80.0,
            max_speed: 2.5,
            arrival_radius: 0.3,
            max_accel: 20.0,
        }
    }
}

impl Params {
    /// Validate this parameter set against `dt`.
    ///
    /// Returns `Ok(())` if every invariant holds, otherwise the first
    /// offending constraint as a [`CrowdError`]. Guards the same
    /// failure modes as the other force-based models:
    /// non-positive physical parameters, non-negative arrival radius,
    /// finite positive `dt`, and the explicit-Euler CFL-like condition
    /// `dt * max_accel <= max_speed`.
    ///
    /// Cheap: a handful of comparisons, no allocations.
    pub fn validate(&self, dt: f64) -> Result<(), crate::error::CrowdError> {
        use crate::error::{require_dt, require_nonneg, require_positive, CrowdError};
        const M: &str = "SocialForce";
        require_dt(M, dt)?;
        require_positive(M, "tau", self.tau)?;
        require_positive(M, "b_ped", self.b_ped)?;
        require_positive(M, "b_wall", self.b_wall)?;
        require_positive(M, "mass", self.mass)?;
        require_positive(M, "max_speed", self.max_speed)?;
        require_positive(M, "max_accel", self.max_accel)?;
        require_nonneg(M, "a_ped", self.a_ped)?;
        require_nonneg(M, "a_wall", self.a_wall)?;
        require_nonneg(M, "arrival_radius", self.arrival_radius)?;
        let product = dt * self.max_accel;
        if product > self.max_speed {
            return Err(CrowdError::CflViolation {
                model: M,
                product,
                max_speed: self.max_speed,
                max_dt: self.max_speed / self.max_accel,
            });
        }
        Ok(())
    }
}

/// Unit marker type implementing [`PedestrianModel`] for Social Force.
#[derive(Debug, Clone, Copy, Default)]
pub struct SocialForce;

impl PedestrianModel for SocialForce {
    type Params = Params;

    fn name(&self) -> &'static str {
        "Social Force"
    }

    fn step(&self, peds: &mut [Pedestrian], walls: &[WallSegment], params: &Params, dt: f64) {
        #[allow(deprecated)]
        step(peds, walls, params, dt);
    }
}

/// Free-function step for callers that do not need trait dispatch.
///
/// **Deprecated.** This is the O(n²) reference path retained for
/// numerical comparisons and CPU ↔ CUDA equivalence tests. Production
/// callers should use [`step_scratch`] (zero-allocation broadphase
/// hot path) or [`step_with_grid`] (broadphase, allocates per call).
/// Keeping `step` for parity is fine; routing fresh code through it
/// is not, because at N ≥ ~1 000 it scales as O(n²) with
/// per-tick heap allocation. See `docs/rustsim-crowd.md` P1-7.
#[deprecated(
    since = "0.0.3",
    note = "O(n²) reference path with per-tick heap allocation; use \
            `step_scratch` (zero-alloc) or `step_with_grid` (broadphase) \
            instead. See docs/rustsim-crowd.md P1-7."
)]
#[allow(clippy::needless_range_loop)]
pub fn step(peds: &mut [Pedestrian], walls: &[WallSegment], params: &Params, dt: f64) {
    let n = peds.len();
    // Compute all accelerations first so pairwise forces see the old state.
    let mut accels = vec![[0.0f64; 2]; n];

    for i in 0..n {
        let p = &peds[i];
        let mut f = driving_force(p, params);

        for j in 0..n {
            if i == j {
                continue;
            }
            let q = &peds[j];
            let f_ij = ped_repulsion(p, q, params);
            f = add(f, f_ij);
        }

        for w in walls {
            let f_iw = wall_repulsion(p, w, params);
            f = add(f, f_iw);
        }

        // a = F / m, clamped to `max_accel` for Euler stability.
        accels[i] = cap_accel(scale(f, 1.0 / params.mass), params.max_accel);
    }

    // Integrate.
    for (p, a) in peds.iter_mut().zip(accels.iter()) {
        p.vel = add(p.vel, scale(*a, dt));
        p.vel = clamp_speed(p.vel, params.max_speed);
        p.pos = add(p.pos, scale(p.vel, dt));
    }
}

/// Recommended neighbour cutoff radius for grid queries (metres).
///
/// At distance `r_sum + 8 * b_ped` the pairwise repulsion has decayed
/// to `a_ped * e^-8 ≈ 3.4e-4 * a_ped`, which contributes well under
/// 1 mN at default parameters. The returned cutoff adds a fixed 1 m
/// buffer for generous safety regardless of `b_ped`.
#[inline]
pub fn neighbor_cutoff(params: &Params) -> f64 {
    8.0 * params.b_ped + 1.0
}

/// Grid-accelerated step variant. Semantically equivalent to [`step`]
/// up to numerical floating-point noise for interaction pairs inside
/// `neighbor_cutoff(params)`; pairs outside that radius are pruned
/// because their contribution is below 1 mN at default parameters.
///
/// The caller owns `grid`; rebuild it once per tick with
/// [`NeighborGrid::rebuild`] using the *current* pedestrian positions.
///
/// Use this variant for populations above ~64 agents. Below that
/// threshold the grid's setup cost exceeds the O(n²) savings and
/// [`step`] is faster.
#[allow(clippy::needless_range_loop)]
pub fn step_with_grid(
    peds: &mut [Pedestrian],
    walls: &[WallSegment],
    params: &Params,
    dt: f64,
    grid: &NeighborGrid,
) {
    let n = peds.len();
    let cutoff = neighbor_cutoff(params);
    let mut accels = vec![[0.0f64; 2]; n];

    for i in 0..n {
        let p = &peds[i];
        let mut f = driving_force(p, params);

        grid.for_each_neighbor(i, cutoff, peds, |_j, q| {
            f = add(f, ped_repulsion(p, q, params));
        });

        for w in walls {
            f = add(f, wall_repulsion(p, w, params));
        }

        accels[i] = cap_accel(scale(f, 1.0 / params.mass), params.max_accel);
    }

    for (p, a) in peds.iter_mut().zip(accels.iter()) {
        p.vel = add(p.vel, scale(*a, dt));
        p.vel = clamp_speed(p.vel, params.max_speed);
        p.pos = add(p.pos, scale(p.vel, dt));
    }
}

/// Zero-allocation step variant. Reuses `scratch.buf` and rebuilds
/// `scratch.grid` against `peds`, then runs the same math as
/// [`step_with_grid`] without any per-tick allocation.
///
/// This is the **hot-path variant**: allocate one [`Scratch`] per
/// simulation and call `step_scratch` every tick. Allocation-sensitive
/// callers (ECS integration, real-time 30–60 Hz loops) should prefer
/// this over [`step`] / [`step_with_grid`].
#[allow(clippy::needless_range_loop)]
pub fn step_scratch(
    peds: &mut [Pedestrian],
    walls: &[WallSegment],
    params: &Params,
    dt: f64,
    scratch: &mut Scratch,
) {
    let n = peds.len();
    let cutoff = neighbor_cutoff(params);
    scratch.prepare(peds);
    let (accels, grid) = scratch.split();

    for i in 0..n {
        let p = &peds[i];
        let mut f = driving_force(p, params);
        grid.for_each_neighbor(i, cutoff, peds, |_j, q| {
            f = add(f, ped_repulsion(p, q, params));
        });
        for w in walls {
            f = add(f, wall_repulsion(p, w, params));
        }
        accels[i] = cap_accel(scale(f, 1.0 / params.mass), params.max_accel);
    }

    for (p, a) in peds.iter_mut().zip(accels.iter()) {
        p.vel = add(p.vel, scale(*a, dt));
        p.vel = clamp_speed(p.vel, params.max_speed);
        p.pos = add(p.pos, scale(p.vel, dt));
    }
}

/// Rayon-parallel drop-in replacement for [`step_scratch`].
///
/// Semantically **bit-exact** with [`step_scratch`] on the same
/// inputs: each rayon worker writes only to its own `accels[i]` slot
/// from an immutable view of the pedestrian slice, the per-agent
/// force composition is evaluated in the same order as the serial
/// loop (`driving → grid neighbours in grid order → walls in wall
/// order`), and the position/velocity writeback is still a single
/// serial pass (cheap, O(n), and order-dependent only if a future
/// integrator introduces cross-agent coupling — which symplectic
/// Euler does not). The parallel speedup kicks in above ~5 000
/// agents on typical many-core x86; below that the rayon dispatch
/// cost dominates and [`step_scratch`] wins.
///
/// Enable the `rayon` feature of `rustsim-crowd` to use this entry
/// point. For CPU deployments lacking a CUDA GPU this closes the
/// multi-core gap left by the serial hot path and removes the
/// remaining "CPU = single core" bottleneck called out in the
/// production-readiness review.
#[cfg(feature = "rayon")]
#[allow(clippy::needless_range_loop)]
pub fn step_scratch_par(
    peds: &mut [Pedestrian],
    walls: &[WallSegment],
    params: &Params,
    dt: f64,
    scratch: &mut Scratch,
) {
    use rayon::prelude::*;

    let cutoff = neighbor_cutoff(params);
    scratch.prepare(peds);
    let (accels, grid) = scratch.split();
    // Borrow immutably for the parallel pass. Each worker reads
    // `peds` (shared) and writes a distinct `accels[i]` slot.
    let peds_ro: &[Pedestrian] = peds;

    accels.par_iter_mut().enumerate().for_each(|(i, a_slot)| {
        let p = &peds_ro[i];
        let mut f = driving_force(p, params);
        grid.for_each_neighbor(i, cutoff, peds_ro, |_j, q| {
            f = add(f, ped_repulsion(p, q, params));
        });
        for w in walls {
            f = add(f, wall_repulsion(p, w, params));
        }
        *a_slot = cap_accel(scale(f, 1.0 / params.mass), params.max_accel);
    });

    for (p, a) in peds.iter_mut().zip(accels.iter()) {
        p.vel = add(p.vel, scale(*a, dt));
        p.vel = clamp_speed(p.vel, params.max_speed);
        p.pos = add(p.pos, scale(p.vel, dt));
    }
}

/// SIMD-vectorised drop-in replacement for [`step_scratch`].
///
/// Lifts [`crate::simd::pair_force_x4`] into the per-agent inner loop:
/// neighbours returned by the broadphase grid are buffered in 4-wide
/// chunks and the per-chunk pair-repulsion sum is computed across
/// four `f64x4` lanes at once. Driving force, wall repulsion,
/// `cap_accel`, and the integrator stay scalar — only the inner
/// pair-force sum changes.
///
/// # Numerical contract
///
/// Lane summation re-orders the per-pair contributions, so the SIMD
/// path is **not** bit-exact with [`step_scratch`] — only
/// tolerance-equivalent. `tests/simd_tolerance.rs` pins the bound at
/// `1e-9` per agent over a single tick of a representative
/// counter-flow fixture, matching the unit-test envelope on
/// [`crate::simd::pair_force_x4`] itself. This mirrors the same
/// associativity caveat that [`step_scratch_par`] documents for the
/// rayon path (which sums neighbours in grid order but accumulates
/// across threads).
///
/// Enable the `simd` feature of `rustsim-crowd` to use this entry
/// point. The SIMD speedup is consistently visible on x86_64 SSE/AVX
/// and aarch64 NEON above ~2 000 agents; below that the lane-buffer
/// flush overhead dominates and [`step_scratch`] wins.
#[cfg(feature = "simd")]
#[allow(clippy::needless_range_loop)]
pub fn step_scratch_simd(
    peds: &mut [Pedestrian],
    walls: &[WallSegment],
    params: &Params,
    dt: f64,
    scratch: &mut Scratch,
) {
    let n = peds.len();
    let cutoff = neighbor_cutoff(params);
    scratch.prepare(peds);
    let (accels, grid) = scratch.split();

    for i in 0..n {
        let p = &peds[i];
        let mut f = driving_force(p, params);

        // 4-wide neighbour-index buffer; flushed whenever full.
        // Indices are used (rather than `&Pedestrian` refs) so the
        // borrow into `peds` does not have to escape the
        // `for_each_neighbor` closure.
        let mut idxs: [Option<usize>; 4] = [None, None, None, None];
        let mut filled: usize = 0;
        grid.for_each_neighbor(i, cutoff, peds, |j, _q| {
            idxs[filled] = Some(j);
            filled += 1;
            if filled == 4 {
                let buf: [Option<&Pedestrian>; 4] = [
                    Some(&peds[idxs[0].unwrap()]),
                    Some(&peds[idxs[1].unwrap()]),
                    Some(&peds[idxs[2].unwrap()]),
                    Some(&peds[idxs[3].unwrap()]),
                ];
                let pf = crate::simd::pair_force_x4(p, buf, params);
                f = add(f, pf);
                idxs = [None, None, None, None];
                filled = 0;
            }
        });
        if filled > 0 {
            // Flush the trailing partial chunk; `None` lanes contribute zero.
            let buf: [Option<&Pedestrian>; 4] = [
                idxs[0].map(|k| &peds[k]),
                idxs[1].map(|k| &peds[k]),
                idxs[2].map(|k| &peds[k]),
                idxs[3].map(|k| &peds[k]),
            ];
            let pf = crate::simd::pair_force_x4(p, buf, params);
            f = add(f, pf);
        }

        for w in walls {
            f = add(f, wall_repulsion(p, w, params));
        }
        accels[i] = cap_accel(scale(f, 1.0 / params.mass), params.max_accel);
    }

    for (p, a) in peds.iter_mut().zip(accels.iter()) {
        p.vel = add(p.vel, scale(*a, dt));
        p.vel = clamp_speed(p.vel, params.max_speed);
        p.pos = add(p.pos, scale(p.vel, dt));
    }
}

/// Clamp the magnitude of an acceleration vector to `cap`.
///
/// Applied to every agent's net acceleration before Euler integration
/// to keep the stiff Helbing repulsion numerically stable. See
/// [`Params::max_accel`] for the full rationale.
#[inline]
pub fn cap_accel(a: Vec2, cap: f64) -> Vec2 {
    let m = (a[0] * a[0] + a[1] * a[1]).sqrt();
    if m > cap && m > 0.0 {
        scale(a, cap / m)
    } else {
        a
    }
}

/// `f_drive = m * (v0 * e_dest - v) / tau`
#[inline]
pub fn driving_force(p: &Pedestrian, params: &Params) -> Vec2 {
    let e = p.desired_direction();
    let target = scale(e, p.effective_desired_speed(params.arrival_radius));
    let delta = sub(target, p.vel);
    scale(delta, params.mass / params.tau)
}

/// Pairwise repulsion `A * exp((r_ij - d_ij) / B) * e_ij` along the line
/// connecting `q` to `p`, where `r_ij = p.radius + q.radius`.
#[inline]
pub fn ped_repulsion(p: &Pedestrian, q: &Pedestrian, params: &Params) -> Vec2 {
    let diff = sub(p.pos, q.pos);
    let d = norm(diff);
    if d < 1e-9 {
        return [0.0, 0.0];
    }
    let r_sum = p.radius + q.radius;
    let e = scale(diff, 1.0 / d);
    let magnitude = params.a_ped * ((r_sum - d) / params.b_ped).exp();
    scale(e, magnitude)
}

/// Repulsion from the closest point on a wall segment.
#[inline]
pub fn wall_repulsion(p: &Pedestrian, wall: &WallSegment, params: &Params) -> Vec2 {
    let closest = closest_point_on_segment(p.pos, wall.a, wall.b);
    let diff = sub(p.pos, closest);
    let d = norm(diff);
    if d < 1e-9 {
        return [0.0, 0.0];
    }
    let e = scale(diff, 1.0 / d);
    let magnitude = params.a_wall * ((p.radius - d) / params.b_wall).exp();
    scale(e, magnitude)
}

#[cfg(test)]
#[allow(deprecated)] // intentional: pins grid/scratch equivalence vs the deprecated O(n²) `step`.
mod tests {
    use super::*;

    fn single_agent_toward(dest: Vec2) -> Pedestrian {
        Pedestrian {
            pos: [0.0, 0.0],
            vel: [0.0, 0.0],
            radius: 0.25,
            desired_speed: 1.34,
            destination: dest,
        }
    }

    #[test]
    fn integrator_is_semi_implicit_euler() {
        // Pin the integrator contract: the position advance uses the
        // POST-update velocity, not the pre-update one. Setup:
        // * Agent starts at rest at the origin, desired destination
        //   at [+x, 0], no neighbours, no walls.
        // * Single tick of length `dt`.
        //
        // With `tau`, `desired_speed`, `dt` chosen so no clamp fires:
        //   a_x     = desired_speed / tau
        //   v_new_x = 0 + a_x * dt          = desired_speed * dt / tau
        //   p_new_x (symplectic) = 0 + v_new_x * dt = a_x * dt²
        //   p_new_x (explicit)   = 0 + 0     * dt   = 0
        //
        // The symplectic prediction differs from the explicit one by
        // exactly a_x * dt²; a drift-prone explicit integrator would
        // leave the agent at the origin after one tick.
        let mut peds = vec![Pedestrian {
            pos: [0.0, 0.0],
            vel: [0.0, 0.0],
            radius: 0.25,
            desired_speed: 1.34,
            destination: [100.0, 0.0],
        }];
        let params = Params::default();
        let dt = 0.05;
        step(&mut peds, &[], &params, dt);

        // Destination is far (100 m) so the arrival taper is inactive;
        // driving force reduces to `(desired_speed * e_hat - v) / tau`.
        let expected_a_x = peds[0].desired_speed / params.tau;
        let expected_v_x = expected_a_x * dt;
        let expected_p_x = expected_v_x * dt; // symplectic
        let explicit_p_x = 0.0; // pre-update velocity was zero

        // Symplectic move is a_x * dt² above the explicit result.
        assert!(
            peds[0].pos[0] > explicit_p_x + 0.5 * expected_a_x * dt * dt,
            "position advance must use post-update velocity (symplectic Euler), got p_x = {}",
            peds[0].pos[0]
        );
        assert!(
            (peds[0].pos[0] - expected_p_x).abs() < 1e-12,
            "symplectic Euler position should equal a_x * dt² = {}, got {}",
            expected_p_x,
            peds[0].pos[0]
        );
        assert!(
            (peds[0].vel[0] - expected_v_x).abs() < 1e-12,
            "velocity should equal a_x * dt = {}, got {}",
            expected_v_x,
            peds[0].vel[0]
        );
    }

    #[test]
    fn single_agent_moves_toward_destination() {
        let mut peds = vec![single_agent_toward([10.0, 0.0])];
        for _ in 0..100 {
            step(&mut peds, &[], &Params::default(), 0.05);
        }
        assert!(peds[0].pos[0] > 1.0, "agent should have advanced in +x");
        assert!(peds[0].pos[1].abs() < 0.05, "no lateral drift");
    }

    #[test]
    fn two_agents_head_on_do_not_overlap() {
        let mut peds = vec![
            Pedestrian {
                pos: [-5.0, 0.05],
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 1.34,
                destination: [5.0, 0.05],
            },
            Pedestrian {
                pos: [5.0, -0.05],
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 1.34,
                destination: [-5.0, -0.05],
            },
        ];
        for _ in 0..400 {
            step(&mut peds, &[], &Params::default(), 0.02);
        }
        let dx = peds[0].pos[0] - peds[1].pos[0];
        let dy = peds[0].pos[1] - peds[1].pos[1];
        let d = (dx * dx + dy * dy).sqrt();
        assert!(
            d >= peds[0].radius + peds[1].radius,
            "agents overlapped: d={d}"
        );
    }

    #[test]
    fn trait_impl_reports_name() {
        let m = SocialForce;
        assert_eq!(m.name(), "Social Force");
    }

    #[test]
    fn cap_accel_clamps_magnitude() {
        let a = cap_accel([30.0, 40.0], 10.0); // |a|=50
        let m = (a[0] * a[0] + a[1] * a[1]).sqrt();
        assert!((m - 10.0).abs() < 1e-12);
        // Direction preserved.
        assert!((a[0] / a[1] - 30.0 / 40.0).abs() < 1e-12);
        // Below cap is a no-op.
        let b = cap_accel([3.0, 4.0], 10.0);
        assert_eq!(b, [3.0, 4.0]);
    }

    #[test]
    fn agent_settles_inside_arrival_radius() {
        // Walk straight at a destination; after enough ticks the agent
        // must sit inside the arrival radius without overshooting past
        // the goal. Without the arrival taper the agent would overshoot
        // and keep turning around.
        let mut peds = vec![Pedestrian {
            pos: [0.0, 0.0],
            vel: [0.0, 0.0],
            radius: 0.25,
            desired_speed: 1.34,
            destination: [5.0, 0.0],
        }];
        let params = Params::default();
        let mut max_overshoot: f64 = 0.0;
        for _ in 0..1000 {
            step(&mut peds, &[], &params, 0.05);
            let overshoot = peds[0].pos[0] - peds[0].destination[0];
            if overshoot > max_overshoot {
                max_overshoot = overshoot;
            }
        }
        assert!(peds[0].has_arrived(params.arrival_radius + 1e-6));
        assert!(
            max_overshoot <= params.arrival_radius + 1e-3,
            "agent overshot destination by {max_overshoot:.3} m (arrival_radius={})",
            params.arrival_radius
        );
    }

    #[test]
    fn stiff_pair_does_not_blow_up_with_accel_cap() {
        // Place two agents with heavy overlap so the raw Helbing
        // repulsion is >> max_accel * m. Without the cap, explicit
        // Euler would push velocity to double-digit m/s in one tick.
        let mut peds = vec![
            Pedestrian {
                pos: [0.0, 0.0],
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 0.0,
                destination: [0.0, 0.0],
            },
            Pedestrian {
                pos: [0.1, 0.0], // 0.4 m overlap
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 0.0,
                destination: [0.0, 0.0],
            },
        ];
        let params = Params::default();
        step(&mut peds, &[], &params, 0.05);
        // After one tick: |v| must be bounded by max_accel * dt = 1.0 m/s,
        // then further by max_speed = 2.5 m/s. Without the cap the raw
        // force (~2 kN * e^(0.4/0.08)) / 80 kg * dt ≈ hundreds of m/s.
        for p in &peds {
            let v = (p.vel[0] * p.vel[0] + p.vel[1] * p.vel[1]).sqrt();
            assert!(
                v <= params.max_accel * 0.05 + 1e-9,
                "velocity {v} exceeded max_accel*dt={} — accel cap not applied",
                params.max_accel * 0.05
            );
        }
    }

    #[test]
    fn step_with_grid_matches_step_within_tolerance() {
        // Seedable deterministic scatter of 32 agents in a 6 x 6 box.
        let mut a: Vec<Pedestrian> = Vec::new();
        for k in 0..32 {
            let x = ((k * 2654435761u64) % 6_000_000) as f64 / 1_000_000.0;
            let y = ((k * 40503u64) % 6_000_000) as f64 / 1_000_000.0;
            a.push(Pedestrian {
                pos: [x, y],
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 1.2,
                destination: [x + 5.0, y],
            });
        }
        let mut b = a.clone();
        let params = Params::default();
        let mut grid = crate::broadphase::NeighborGrid::new(neighbor_cutoff(&params));

        for _ in 0..50 {
            step(&mut a, &[], &params, 0.05);
            grid.rebuild(&b);
            step_with_grid(&mut b, &[], &params, 0.05, &grid);
        }

        // After 50 steps the two paths must agree to within a tight
        // tolerance: only pairs outside the cutoff differ, and at
        // defaults their force is below 1 mN.
        for i in 0..a.len() {
            let dx = a[i].pos[0] - b[i].pos[0];
            let dy = a[i].pos[1] - b[i].pos[1];
            let d = (dx * dx + dy * dy).sqrt();
            assert!(
                d < 1e-3,
                "agent {i}: grid path diverged from O(n^2) by {d:.3e} m"
            );
        }
    }
    #[test]
    fn step_scratch_matches_step_with_grid_bit_exact() {
        // `step_scratch` is algebraically identical to `step_with_grid`
        // — it only eliminates per-tick allocations. The two trajectories
        // must therefore agree to machine precision on the same fixture.
        let mut a: Vec<Pedestrian> = Vec::new();
        for k in 0..24 {
            let x = ((k * 2654435761u64) % 6_000_000) as f64 / 1_000_000.0;
            let y = ((k * 40503u64) % 6_000_000) as f64 / 1_000_000.0;
            a.push(Pedestrian {
                pos: [x, y],
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 1.2,
                destination: [x + 5.0, y],
            });
        }
        let mut b = a.clone();
        let params = Params::default();
        let cutoff = neighbor_cutoff(&params);
        let mut grid = crate::broadphase::NeighborGrid::new(cutoff);
        let mut scratch = crate::broadphase::Scratch::with_capacity(a.len(), cutoff);

        for _ in 0..40 {
            grid.rebuild(&a);
            step_with_grid(&mut a, &[], &params, 0.05, &grid);
            step_scratch(&mut b, &[], &params, 0.05, &mut scratch);
        }

        for i in 0..a.len() {
            assert_eq!(a[i].pos, b[i].pos, "scratch path diverged at {i}");
            assert_eq!(a[i].vel, b[i].vel);
        }
    }

    #[cfg(feature = "rayon")]
    #[test]
    fn step_scratch_par_matches_step_scratch_bit_exact() {
        // Pin the parallel contract: `step_scratch_par` must produce
        // bit-exact (Rust `==` on `[f64; 2]`) trajectories vs the
        // serial `step_scratch`. Each rayon worker writes its own
        // `accels[i]` slot from an immutable view of `peds`, so no
        // non-associative float reduction can sneak in; the only
        // ordering choice is deterministic (grid-major).
        let mut a: Vec<Pedestrian> = Vec::new();
        for k in 0..64 {
            let x = ((k * 2654435761u64) % 6_000_000) as f64 / 1_000_000.0;
            let y = ((k * 40503u64) % 6_000_000) as f64 / 1_000_000.0;
            a.push(Pedestrian {
                pos: [x, y],
                vel: [0.0, 0.0],
                radius: 0.25,
                desired_speed: 1.2,
                destination: [x + 5.0, y],
            });
        }
        let mut b = a.clone();
        let walls = vec![WallSegment {
            a: [-1.0, -1.0],
            b: [20.0, -1.0],
        }];
        let params = Params::default();
        let cutoff = neighbor_cutoff(&params);
        let mut scratch_a = crate::broadphase::Scratch::with_capacity(a.len(), cutoff);
        let mut scratch_b = crate::broadphase::Scratch::with_capacity(b.len(), cutoff);

        for _ in 0..40 {
            step_scratch(&mut a, &walls, &params, 0.05, &mut scratch_a);
            step_scratch_par(&mut b, &walls, &params, 0.05, &mut scratch_b);
        }

        for i in 0..a.len() {
            assert_eq!(
                a[i].pos, b[i].pos,
                "parallel path diverged in position at {i}"
            );
            assert_eq!(
                a[i].vel, b[i].vel,
                "parallel path diverged in velocity at {i}"
            );
        }
    }
}