#![cfg(feature = "simd")]
use wide::{f64x4, CmpGt};
use crate::common::Pedestrian;
use crate::social_force::Params;
#[inline]
fn one_mask(mask: wide::f64x4) -> f64x4 {
mask & f64x4::splat(f64::from_bits(0x3FF0_0000_0000_0000))
}
#[inline]
pub fn pair_force_x4(
p: &Pedestrian,
neighbours: [Option<&Pedestrian>; 4],
params: &Params,
) -> [f64; 2] {
let pxi = f64x4::splat(p.pos[0]);
let pyi = f64x4::splat(p.pos[1]);
let ri = f64x4::splat(p.radius);
let a_ped = f64x4::splat(params.a_ped);
let b_ped = f64x4::splat(params.b_ped);
let mut qxs = [0.0f64; 4];
let mut qys = [0.0f64; 4];
let mut qrs = [0.0f64; 4];
let mut valid = [0.0f64; 4];
for (k, slot) in neighbours.iter().enumerate() {
if let Some(q) = slot {
qxs[k] = q.pos[0];
qys[k] = q.pos[1];
qrs[k] = q.radius;
valid[k] = 1.0;
} else {
qxs[k] = p.pos[0] + 1.0e9;
qys[k] = p.pos[1];
qrs[k] = 0.0;
valid[k] = 0.0;
}
}
let qx = f64x4::new(qxs);
let qy = f64x4::new(qys);
let qr = f64x4::new(qrs);
let mask = f64x4::new(valid);
let dx = pxi - qx;
let dy = pyi - qy;
let d2 = dx * dx + dy * dy;
let d = d2.sqrt();
let eps = f64x4::splat(1.0e-9);
let nondegen = one_mask(d.cmp_gt(eps));
let r_sum = ri + qr;
let inv_d = f64x4::splat(1.0) / (d + f64x4::splat(1.0e-300)); let ex = dx * inv_d;
let ey = dy * inv_d;
let arg = (r_sum - d) / b_ped;
let mag = a_ped * arg.exp();
let mag = mag * mask * nondegen;
let fx = mag * ex;
let fy = mag * ey;
let fx_arr = fx.to_array();
let fy_arr = fy.to_array();
let sum_fx = fx_arr[0] + fx_arr[1] + fx_arr[2] + fx_arr[3];
let sum_fy = fy_arr[0] + fy_arr[1] + fy_arr[2] + fy_arr[3];
[sum_fx, sum_fy]
}
#[inline]
pub fn gcf_pair_force_x4(
p: &Pedestrian,
neighbours: [Option<&Pedestrian>; 4],
params: &crate::generalized_centrifugal_force::Params,
) -> [f64; 2] {
use crate::common::norm;
let pxi = f64x4::splat(p.pos[0]);
let pyi = f64x4::splat(p.pos[1]);
let pvxi = f64x4::splat(p.vel[0]);
let pvyi = f64x4::splat(p.vel[1]);
let r_i = f64x4::splat(params.a + params.b * norm(p.vel));
let v0 = f64x4::splat(p.desired_speed);
let mass = f64x4::splat(params.mass);
let min_clearance = f64x4::splat(params.min_clearance);
let mut qxs = [0.0f64; 4];
let mut qys = [0.0f64; 4];
let mut qvxs = [0.0f64; 4];
let mut qvys = [0.0f64; 4];
let mut rjs = [0.0f64; 4];
let mut valid = [0.0f64; 4];
for (k, slot) in neighbours.iter().enumerate() {
if let Some(q) = slot {
qxs[k] = q.pos[0];
qys[k] = q.pos[1];
qvxs[k] = q.vel[0];
qvys[k] = q.vel[1];
rjs[k] = params.a + params.b * norm(q.vel);
valid[k] = 1.0;
} else {
qxs[k] = p.pos[0] + 1.0e9;
qys[k] = p.pos[1];
qvxs[k] = 0.0;
qvys[k] = 0.0;
rjs[k] = 0.0;
valid[k] = 0.0;
}
}
let qx = f64x4::new(qxs);
let qy = f64x4::new(qys);
let qvx = f64x4::new(qvxs);
let qvy = f64x4::new(qvys);
let r_j = f64x4::new(rjs);
let mask = f64x4::new(valid);
let dx = pxi - qx;
let dy = pyi - qy;
let d2 = dx * dx + dy * dy;
let d = d2.sqrt();
let eps = f64x4::splat(1.0e-9);
let nondegen = one_mask(d.cmp_gt(eps));
let inv_d = f64x4::splat(1.0) / (d + f64x4::splat(1.0e-300));
let ex = dx * inv_d;
let ey = dy * inv_d;
let vrx = pvxi - qvx;
let vry = pvyi - qvy;
let neg_dot = -(vrx * ex + vry * ey);
let pos_mask = one_mask(neg_dot.cmp_gt(f64x4::splat(0.0)));
let approach = neg_dot * pos_mask;
let raw_clearance = d - r_i - r_j;
let above = one_mask(raw_clearance.cmp_gt(min_clearance));
let one = f64x4::splat(1.0);
let clearance = raw_clearance * above + min_clearance * (one - above);
let speed_term = v0 + approach;
let mag = mass * speed_term * speed_term / clearance;
let mag = mag * mask * nondegen;
let fx = mag * ex;
let fy = mag * ey;
let fx_arr = fx.to_array();
let fy_arr = fy.to_array();
[
fx_arr[0] + fx_arr[1] + fx_arr[2] + fx_arr[3],
fy_arr[0] + fy_arr[1] + fy_arr[2] + fy_arr[3],
]
}
#[inline]
pub fn cfs_direction_x4(
p: &Pedestrian,
neighbours: [Option<&Pedestrian>; 4],
params: &crate::collision_free_speed::Params,
) -> [f64; 2] {
let pxi = f64x4::splat(p.pos[0]);
let pyi = f64x4::splat(p.pos[1]);
let ri = f64x4::splat(p.radius);
let strength = f64x4::splat(params.interaction_strength);
let range = f64x4::splat(params.interaction_range);
let mut qxs = [0.0f64; 4];
let mut qys = [0.0f64; 4];
let mut qrs = [0.0f64; 4];
let mut valid = [0.0f64; 4];
for (k, slot) in neighbours.iter().enumerate() {
if let Some(q) = slot {
qxs[k] = q.pos[0];
qys[k] = q.pos[1];
qrs[k] = q.radius;
valid[k] = 1.0;
} else {
qxs[k] = p.pos[0] + 1.0e9;
qys[k] = p.pos[1];
qrs[k] = 0.0;
valid[k] = 0.0;
}
}
let qx = f64x4::new(qxs);
let qy = f64x4::new(qys);
let qr = f64x4::new(qrs);
let lane_valid = f64x4::new(valid);
let dx = pxi - qx;
let dy = pyi - qy;
let d = (dx * dx + dy * dy).sqrt();
let nondegen = one_mask(d.cmp_gt(f64x4::splat(1.0e-9)));
let inv_d = f64x4::splat(1.0) / (d + f64x4::splat(1.0e-300));
let ex = dx * inv_d;
let ey = dy * inv_d;
let raw_clearance = d - (ri + qr);
let positive = one_mask(raw_clearance.cmp_gt(f64x4::splat(0.0)));
let clearance = raw_clearance * positive;
let weight = strength * (-(clearance / range)).exp() * lane_valid * nondegen;
let fx = (weight * ex).to_array();
let fy = (weight * ey).to_array();
[fx[0] + fx[1] + fx[2] + fx[3], fy[0] + fy[1] + fy[2] + fy[3]]
}
#[inline]
pub fn cfs_headroom_x4(p: &Pedestrian, e: [f64; 2], neighbours: [Option<&Pedestrian>; 4]) -> f64 {
let pxi = f64x4::splat(p.pos[0]);
let pyi = f64x4::splat(p.pos[1]);
let ex = f64x4::splat(e[0]);
let ey = f64x4::splat(e[1]);
let ri = f64x4::splat(p.radius);
let mut qxs = [0.0f64; 4];
let mut qys = [0.0f64; 4];
let mut qrs = [0.0f64; 4];
let mut valid = [false; 4];
for (k, slot) in neighbours.iter().enumerate() {
if let Some(q) = slot {
qxs[k] = q.pos[0];
qys[k] = q.pos[1];
qrs[k] = q.radius;
valid[k] = true;
} else {
qxs[k] = p.pos[0] + 1.0e9;
qys[k] = p.pos[1];
qrs[k] = 0.0;
}
}
let rel_x = f64x4::new(qxs) - pxi;
let rel_y = f64x4::new(qys) - pyi;
let forward = rel_x * ex + rel_y * ey;
let proj_x = ex * forward;
let proj_y = ey * forward;
let lat_x = rel_x - proj_x;
let lat_y = rel_y - proj_y;
let lat = (lat_x * lat_x + lat_y * lat_y).sqrt();
let d = (rel_x * rel_x + rel_y * rel_y).sqrt();
let r_sum = ri + f64x4::new(qrs);
let forward = forward.to_array();
let lat = lat.to_array();
let d = d.to_array();
let r_sum = r_sum.to_array();
let mut best = f64::INFINITY;
for k in 0..4 {
if valid[k] && forward[k] > 0.0 && lat[k] <= r_sum[k] && d[k] < best {
best = d[k];
}
}
best
}
#[inline]
pub fn avm_headroom_x4(
p: &Pedestrian,
e: [f64; 2],
neighbours: [Option<&Pedestrian>; 4],
params: &crate::anticipation_velocity::Params,
) -> f64 {
let pxi = f64x4::splat(p.pos[0]);
let pyi = f64x4::splat(p.pos[1]);
let ex = f64x4::splat(e[0]);
let ey = f64x4::splat(e[1]);
let ri = f64x4::splat(p.radius);
let anticipation_time = f64x4::splat(params.anticipation_time);
let mut qxs = [0.0f64; 4];
let mut qys = [0.0f64; 4];
let mut qvxs = [0.0f64; 4];
let mut qvys = [0.0f64; 4];
let mut qrs = [0.0f64; 4];
let mut valid = [false; 4];
for (k, slot) in neighbours.iter().enumerate() {
if let Some(q) = slot {
qxs[k] = q.pos[0];
qys[k] = q.pos[1];
qvxs[k] = q.vel[0];
qvys[k] = q.vel[1];
qrs[k] = q.radius;
valid[k] = true;
} else {
qxs[k] = p.pos[0] + 1.0e9;
qys[k] = p.pos[1];
qvxs[k] = 0.0;
qvys[k] = 0.0;
qrs[k] = 0.0;
}
}
let q_future_x = f64x4::new(qxs) + f64x4::new(qvxs) * anticipation_time;
let q_future_y = f64x4::new(qys) + f64x4::new(qvys) * anticipation_time;
let rel_x = q_future_x - pxi;
let rel_y = q_future_y - pyi;
let forward = rel_x * ex + rel_y * ey;
let proj_x = ex * forward;
let proj_y = ey * forward;
let lat_x = rel_x - proj_x;
let lat_y = rel_y - proj_y;
let lat = (lat_x * lat_x + lat_y * lat_y).sqrt();
let d = (rel_x * rel_x + rel_y * rel_y).sqrt();
let r_sum = ri + f64x4::new(qrs);
let forward = forward.to_array();
let lat = lat.to_array();
let d = d.to_array();
let r_sum = r_sum.to_array();
let mut best = f64::INFINITY;
for k in 0..4 {
if valid[k] && forward[k] > 0.0 && lat[k] <= r_sum[k] && d[k] < best {
best = d[k];
}
}
best
}
#[cfg(test)]
mod tests {
use super::*;
use crate::common::Pedestrian;
use crate::social_force::ped_repulsion;
fn fixture() -> (Pedestrian, [Pedestrian; 4]) {
let p = Pedestrian::new([0.0, 0.0], [0.5, 0.0], 0.25, 1.34, [50.0, 0.0]);
let q = [
Pedestrian::new([0.4, 0.1], [0.0, 0.0], 0.20, 1.34, [50.0, 0.0]),
Pedestrian::new([-0.6, 0.05], [0.0, 0.0], 0.30, 1.34, [50.0, 0.0]),
Pedestrian::new([0.0, 1.5], [0.0, 0.0], 0.25, 1.34, [50.0, 0.0]),
Pedestrian::new([3.0, -2.5], [0.0, 0.0], 0.25, 1.34, [50.0, 0.0]),
];
(p, q)
}
#[test]
fn simd_pair_force_matches_scalar_within_tolerance() {
let (p, qs) = fixture();
let params = Params::default();
let scalar: [f64; 2] = qs
.iter()
.map(|q| ped_repulsion(&p, q, ¶ms))
.fold([0.0, 0.0], |acc, f| [acc[0] + f[0], acc[1] + f[1]]);
let simd = pair_force_x4(
&p,
[Some(&qs[0]), Some(&qs[1]), Some(&qs[2]), Some(&qs[3])],
¶ms,
);
let dx = (simd[0] - scalar[0]).abs();
let dy = (simd[1] - scalar[1]).abs();
assert!(
dx < 1.0e-9 && dy < 1.0e-9,
"SIMD pair force diverged from scalar baseline: scalar={:?} simd={:?} (dx={dx}, dy={dy})",
scalar,
simd
);
}
#[test]
fn simd_pair_force_with_partial_chunk_zeroes_invalid_lanes() {
let (p, qs) = fixture();
let params = Params::default();
let scalar: [f64; 2] = qs
.iter()
.take(2)
.map(|q| ped_repulsion(&p, q, ¶ms))
.fold([0.0, 0.0], |acc, f| [acc[0] + f[0], acc[1] + f[1]]);
let simd = pair_force_x4(&p, [Some(&qs[0]), Some(&qs[1]), None, None], ¶ms);
let dx = (simd[0] - scalar[0]).abs();
let dy = (simd[1] - scalar[1]).abs();
assert!(
dx < 1.0e-9 && dy < 1.0e-9,
"SIMD partial-chunk diverged: scalar={:?} simd={:?} (dx={dx}, dy={dy})",
scalar,
simd
);
}
#[test]
fn simd_pair_force_with_self_overlap_returns_zero() {
let p = Pedestrian::new([1.0, 1.0], [0.0, 0.0], 0.25, 1.34, [50.0, 0.0]);
let q = Pedestrian::new([1.0, 1.0], [0.0, 0.0], 0.25, 1.34, [50.0, 0.0]);
let params = Params::default();
let simd = pair_force_x4(&p, [Some(&q), None, None, None], ¶ms);
assert!(
simd[0].abs() < 1.0e-12 && simd[1].abs() < 1.0e-12,
"SIMD self-overlap should be zero, got {:?}",
simd
);
}
#[test]
fn simd_gcf_pair_force_matches_scalar_within_tolerance() {
use crate::generalized_centrifugal_force::{ped_force, Params as GcfParams};
let p = Pedestrian::new([0.0, 0.0], [1.0, 0.0], 0.25, 1.34, [50.0, 0.0]);
let qs = [
Pedestrian::new([0.7, 0.05], [-0.8, 0.0], 0.25, 1.34, [-50.0, 0.0]),
Pedestrian::new([-0.6, 0.1], [1.2, 0.0], 0.30, 1.34, [50.0, 0.0]),
Pedestrian::new([0.0, 1.5], [0.0, -0.5], 0.25, 1.34, [50.0, -50.0]),
Pedestrian::new([3.0, -2.5], [0.0, 0.0], 0.25, 1.34, [-50.0, 50.0]),
];
let params = GcfParams::default();
let scalar: [f64; 2] = qs
.iter()
.map(|q| ped_force(&p, q, ¶ms))
.fold([0.0, 0.0], |acc, f| [acc[0] + f[0], acc[1] + f[1]]);
let simd = gcf_pair_force_x4(
&p,
[Some(&qs[0]), Some(&qs[1]), Some(&qs[2]), Some(&qs[3])],
¶ms,
);
let dx = (simd[0] - scalar[0]).abs();
let dy = (simd[1] - scalar[1]).abs();
assert!(
dx < 1.0e-9 && dy < 1.0e-9,
"SIMD GCF pair force diverged from scalar: scalar={:?} simd={:?} (dx={dx}, dy={dy})",
scalar,
simd
);
}
#[test]
fn simd_gcf_pair_force_with_partial_chunk_zeroes_invalid_lanes() {
use crate::generalized_centrifugal_force::{ped_force, Params as GcfParams};
let p = Pedestrian::new([0.0, 0.0], [1.0, 0.0], 0.25, 1.34, [50.0, 0.0]);
let qs = [
Pedestrian::new([0.7, 0.05], [-0.8, 0.0], 0.25, 1.34, [-50.0, 0.0]),
Pedestrian::new([-0.6, 0.1], [1.2, 0.0], 0.30, 1.34, [50.0, 0.0]),
];
let params = GcfParams::default();
let scalar: [f64; 2] = qs
.iter()
.map(|q| ped_force(&p, q, ¶ms))
.fold([0.0, 0.0], |acc, f| [acc[0] + f[0], acc[1] + f[1]]);
let simd = gcf_pair_force_x4(&p, [Some(&qs[0]), Some(&qs[1]), None, None], ¶ms);
let dx = (simd[0] - scalar[0]).abs();
let dy = (simd[1] - scalar[1]).abs();
assert!(
dx < 1.0e-9 && dy < 1.0e-9,
"SIMD GCF partial-chunk diverged: scalar={:?} simd={:?} (dx={dx}, dy={dy})",
scalar,
simd
);
}
}