#![cfg_attr(feature = "simd", feature(portable_simd))]
#![cfg_attr(not(test), no_std)]
#![warn(clippy::pedantic, clippy::cargo, clippy::nursery, missing_docs)]
extern crate alloc;
use aligned_vec::{ABox, AVec, RuntimeAlign};
use alloc::{boxed::Box, vec, vec::Vec};
use core::{
alloc::Layout,
array,
cmp::max,
fmt::Debug,
mem::size_of,
ops::{Add, Sub},
};
#[cfg(feature = "simd")]
use core::{
ops::{AddAssign, Mul, SubAssign},
simd::{
Mask, Simd, SimdElement,
cmp::{SimdPartialEq, SimdPartialOrd},
ptr::SimdConstPtr,
},
};
pub trait Axis: PartialOrd + Copy + Sub<Output = Self> + Add<Output = Self> {
const ZERO: Self;
const INFINITY: Self;
const NEG_INFINITY: Self;
#[must_use]
fn is_finite(self) -> bool;
#[must_use]
fn in_between(self, rhs: Self) -> Self;
#[must_use]
fn square(self) -> Self;
}
#[cfg(feature = "simd")]
pub trait AxisSimdElement: SimdElement + Default + Axis {}
#[cfg(feature = "simd")]
pub trait AxisSimd<const L: usize>:
Sized
+ SimdPartialOrd
+ Add<Output = Self>
+ AddAssign
+ Sub<Output = Self>
+ SubAssign
+ Mul<Output = Self>
{
fn cast_mask(mask: <Self as SimdPartialEq>::Mask) -> Mask<isize, L>;
fn mask_any(mask: <Self as SimdPartialEq>::Mask) -> bool;
}
pub trait Index: TryFrom<usize> + TryInto<usize> + Copy {
const ZERO: Self;
}
#[cfg(feature = "simd")]
pub trait IndexSimd: SimdElement + Default {
#[must_use]
unsafe fn to_simd_usize_unchecked<const L: usize>(x: Simd<Self, L>) -> Simd<usize, L>;
}
macro_rules! impl_axis {
($t: ty, $tm: ty) => {
impl Axis for $t {
const ZERO: Self = 0.0;
const INFINITY: Self = <$t>::INFINITY;
const NEG_INFINITY: Self = <$t>::NEG_INFINITY;
fn is_finite(self) -> bool {
<$t>::is_finite(self)
}
fn in_between(self, rhs: Self) -> Self {
(self + rhs) / 2.0
}
fn square(self) -> Self {
self * self
}
}
#[cfg(feature = "simd")]
impl AxisSimdElement for $t {}
#[cfg(feature = "simd")]
impl<const L: usize> AxisSimd<L> for Simd<$t, L> {
fn cast_mask(mask: <Self as SimdPartialEq>::Mask) -> Mask<isize, L> {
mask.into()
}
fn mask_any(mask: <Self as SimdPartialEq>::Mask) -> bool {
mask.any()
}
}
};
}
macro_rules! impl_idx {
($t: ty) => {
impl Index for $t {
const ZERO: Self = 0;
}
#[cfg(feature = "simd")]
impl IndexSimd for $t {
unsafe fn to_simd_usize_unchecked<const L: usize>(x: Simd<Self, L>) -> Simd<usize, L> {
unsafe { x.to_array().map(|a| a.try_into().unwrap_unchecked()).into() }
}
}
};
}
impl_axis!(f32, i32);
impl_axis!(f64, i64);
impl_idx!(u8);
impl_idx!(u16);
impl_idx!(u32);
impl_idx!(u64);
impl_idx!(usize);
fn clamp<A: PartialOrd>(x: A, min: A, max: A) -> A {
if x < min {
min
} else if x > max {
max
} else {
x
}
}
#[inline]
#[allow(clippy::cast_possible_wrap)]
#[cfg(feature = "simd")]
fn forward_pass_simd<A, const K: usize, const L: usize>(
tests: &[A],
centers: &[Simd<A, L>; K],
) -> Simd<isize, L>
where
Simd<A, L>: AxisSimd<L>,
A: AxisSimdElement,
{
let mut test_idxs: Simd<isize, L> = Simd::splat(0);
let mut k = 0;
for _ in 0..tests.len().trailing_ones() {
let test_ptrs = Simd::splat(tests.as_ptr()).wrapping_offset(test_idxs);
let relevant_tests: Simd<A, L> = unsafe { Simd::gather_ptr(test_ptrs) };
let cmp_results: Mask<isize, L> =
Simd::<A, L>::cast_mask(centers[k % K].simd_ge(relevant_tests));
let one = Simd::splat(1);
test_idxs = (test_idxs << one) + one + (cmp_results.to_simd() & Simd::splat(1));
k = (k + 1) % K;
}
test_idxs - Simd::splat(tests.len() as isize)
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[allow(clippy::module_name_repetitions)]
pub struct Capt<const K: usize, A = f32, I = usize> {
tests: Box<[A]>,
aabbs: Box<[Aabb<A, K>]>,
starts: Box<[I]>,
afforded: [ABox<[A], RuntimeAlign>; K],
r_point: A,
lanes_log2: u32,
}
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Aabb<A, const K: usize> {
pub lo: [A; K],
pub hi: [A; K],
}
#[non_exhaustive]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum NewCaptError {
TooManyPoints,
NonFinite,
InvalidLaneCount,
}
impl<A, I, const K: usize> Capt<K, A, I>
where
A: Axis,
I: Index,
{
pub fn new(points: &[[A; K]], r_range: (A, A), n_lanes: usize) -> Self {
Self::try_new(points, r_range, n_lanes)
.expect("index type I must be able to support all points in CAPT during construction")
}
pub fn with_point_radius(
points: &[[A; K]],
r_range: (A, A),
r_point: A,
n_lanes: usize,
) -> Self {
Self::try_with_point_radius(points, r_range, r_point, n_lanes)
.expect("index type I must be able to support all points in CAPT during construction")
}
pub fn try_new(
points: &[[A; K]],
r_range: (A, A),
n_lanes: usize,
) -> Result<Self, NewCaptError> {
Self::try_with_point_radius(points, r_range, A::ZERO, n_lanes)
}
pub fn try_with_point_radius(
points: &[[A; K]],
r_range: (A, A),
r_point: A,
n_lanes: usize,
) -> Result<Self, NewCaptError> {
let n2 = points.len().next_power_of_two();
if !n_lanes.is_power_of_two() {
return Err(NewCaptError::InvalidLaneCount);
}
let lanes_log2 = n_lanes.ilog2();
if points.iter().any(|p| p.iter().any(|x| !x.is_finite())) {
return Err(NewCaptError::NonFinite);
}
let mut tests = vec![A::INFINITY; n2 - 1].into_boxed_slice();
let mut points2 = vec![[A::INFINITY; K]; n2].into_boxed_slice();
points2[..points.len()].copy_from_slice(points);
let layout = Layout::array::<A>(n_lanes).map_err(|_| NewCaptError::InvalidLaneCount)?;
let alignment = max(layout.size(), layout.align());
let mut afforded = array::from_fn(|_| AVec::with_capacity(alignment, n2 * 100));
let mut starts = vec![I::ZERO; n2 + 1].into_boxed_slice();
let mut aabbs = vec![
Aabb {
lo: [A::NEG_INFINITY; K],
hi: [A::INFINITY; K],
};
n2
]
.into_boxed_slice();
unsafe {
Self::new_help(
&mut points2,
&mut tests,
&mut aabbs,
&mut afforded,
&mut starts,
0,
0,
r_range,
lanes_log2,
Vec::new(),
Aabb::ALL,
)?;
}
Ok(Self {
tests,
starts,
afforded: afforded.map(AVec::into_boxed_slice),
aabbs,
r_point,
lanes_log2,
})
}
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
unsafe fn new_help(
points: &mut [[A; K]],
tests: &mut [A],
aabbs: &mut [Aabb<A, K>],
afforded: &mut [AVec<A, RuntimeAlign>; K],
starts: &mut [I],
k: usize,
i: usize,
r_range: (A, A),
lanes_log2: u32,
in_range: Vec<[A; K]>,
cell: Aabb<A, K>,
) -> Result<(), NewCaptError> {
let lanes_mask = (1 << lanes_log2) - 1;
unsafe {
let rsq_min = r_range.0.square();
if let [rep] = *points {
let z = i - tests.len();
let aabb = &mut aabbs[z];
*aabb = Aabb { lo: rep, hi: rep };
if rep[0].is_finite() {
for k in 0..K {
afforded[k].push(rep[k]);
}
let mut n_points_added = 1;
if !cell.contained_by_ball(&rep, rsq_min) {
for ak in afforded.iter_mut() {
ak.reserve(ak.len() + in_range.len());
}
for p in in_range {
aabb.insert(&p);
for k in 0..K {
afforded[k].push(p[k]);
}
n_points_added += 1;
}
}
while n_points_added & lanes_mask != 0 {
for aff in afforded.iter_mut() {
aff.push(A::INFINITY);
}
n_points_added += 1;
}
assert!(afforded[0].len() & lanes_mask == 0);
for k in 0..(K - 1) {
assert_eq!(afforded[k].len(), afforded[k + 1].len());
}
}
starts[z + 1] = afforded[0]
.len()
.try_into()
.map_err(|_| NewCaptError::TooManyPoints)?;
let is_misaligned = (starts[z + 1])
.try_into()
.is_ok_and(|start| (start & lanes_mask) != 0);
assert!(
!is_misaligned,
"start indices for affordance bufs must be aligned"
);
return Ok(());
}
let test = median_partition(points, k);
tests[i] = test;
let (lhs, rhs) = points.split_at_mut(points.len() / 2);
let (lo_vol, hi_vol) = cell.split(test, k);
let lo_too_small = distsq(lo_vol.lo, lo_vol.hi) <= rsq_min;
let hi_too_small = distsq(hi_vol.lo, hi_vol.hi) <= rsq_min;
let (lo_afford, hi_afford) = match (lo_too_small, hi_too_small) {
(false, false) => {
let mut lo_afford = in_range;
let mut hi_afford = lo_afford.clone();
lo_afford.retain(|pt| pt[k] <= test + r_range.1);
lo_afford.extend(rhs.iter().filter(|pt| pt[k] <= test + r_range.1));
hi_afford.retain(|pt| pt[k].is_finite() && test - r_range.1 <= pt[k]);
hi_afford.extend(
lhs.iter()
.filter(|pt| pt[k].is_finite() && test - r_range.1 <= pt[k]),
);
(lo_afford, hi_afford)
}
(false, true) => {
let mut lo_afford = in_range;
lo_afford.retain(|pt| pt[k] <= test + r_range.1);
lo_afford.extend(rhs.iter().filter(|pt| pt[k] <= test + r_range.1));
(lo_afford, Vec::new())
}
(true, false) => {
let mut hi_afford = in_range;
hi_afford.retain(|pt| test - r_range.1 <= pt[k]);
hi_afford.extend(
lhs.iter()
.filter(|pt| pt[k].is_finite() && test - r_range.1 <= pt[k]),
);
(Vec::new(), hi_afford)
}
(true, true) => (Vec::new(), Vec::new()),
};
let next_k = (k + 1) % K;
Self::new_help(
lhs,
tests,
aabbs,
afforded,
starts,
next_k,
2 * i + 1,
r_range,
lanes_log2,
lo_afford,
lo_vol,
)?;
Self::new_help(
rhs,
tests,
aabbs,
afforded,
starts,
next_k,
2 * i + 2,
r_range,
lanes_log2,
hi_afford,
hi_vol,
)?;
Ok(())
}
}
#[must_use]
pub fn collides(&self, center: &[A; K], mut radius: A) -> bool {
radius = radius + self.r_point;
let mut test_idx = 0;
let mut k = 0;
for _ in 0..self.tests.len().trailing_ones() {
test_idx = 2 * test_idx
+ 1
+ usize::from(unsafe { *self.tests.get_unchecked(test_idx) } <= center[k]);
k = (k + 1) % K;
}
let rsq = radius.square();
let i = test_idx - self.tests.len();
let aabb = unsafe { self.aabbs.get_unchecked(i) };
if aabb.closest_distsq_to(center) > rsq {
return false;
}
let mut range = unsafe {
self.starts[i].try_into().unwrap_unchecked()
..self.starts[i + 1].try_into().unwrap_unchecked()
};
range.any(|i| {
let aff_pt = array::from_fn(|k| self.afforded[k][i]);
distsq(aff_pt, *center) <= rsq
})
}
#[must_use]
#[doc(hidden)]
pub fn memory_used(&self) -> usize {
size_of::<Self>()
+ K * self.afforded.len() * size_of::<A>()
+ self.starts.len() * size_of::<I>()
+ self.tests.len() * size_of::<I>()
+ self.aabbs.len() * size_of::<Aabb<A, K>>()
}
#[must_use]
#[doc(hidden)]
#[allow(clippy::cast_precision_loss)]
pub fn affordance_size(&self) -> f64 {
self.afforded.len() as f64 / (self.tests.len() + 1) as f64
}
}
#[allow(clippy::mismatching_type_param_order)]
#[cfg(feature = "simd")]
impl<A, I, const K: usize> Capt<K, A, I>
where
I: IndexSimd,
A: Mul<Output = A>,
{
#[must_use]
pub fn collides_simd<const L: usize>(
&self,
centers: &[Simd<A, L>; K],
mut radii: Simd<A, L>,
) -> bool
where
Simd<A, L>: AxisSimd<L>,
A: AxisSimdElement,
{
assert!(L.is_power_of_two(), "lane count must be power of two");
assert!(
1 << self.lanes_log2 >= L,
"lane count of query must be lower than lane count of CAPT"
);
radii += Simd::splat(self.r_point);
let zs = forward_pass_simd(&self.tests, centers);
let mut inbounds = Mask::splat(true);
let mut aabb_ptrs = Simd::splat(self.aabbs.as_ptr()).wrapping_offset(zs).cast();
unsafe {
for center in centers {
inbounds &= Simd::<A, L>::cast_mask(
(Simd::gather_select_ptr(aabb_ptrs, inbounds, Simd::splat(A::NEG_INFINITY))
- radii)
.simd_le(*center),
);
aabb_ptrs = aabb_ptrs.wrapping_add(Simd::splat(1));
}
for center in centers {
inbounds &= Simd::<A, L>::cast_mask(
Simd::gather_select_ptr(aabb_ptrs, inbounds, Simd::splat(A::NEG_INFINITY))
.simd_ge(*center - radii),
);
aabb_ptrs = aabb_ptrs.wrapping_add(Simd::splat(1));
}
}
if !inbounds.any() {
return false;
}
let start_ptrs = Simd::splat(self.starts.as_ptr()).wrapping_offset(zs);
let starts = unsafe { I::to_simd_usize_unchecked(Simd::gather_ptr(start_ptrs)) }.to_array();
let ends = unsafe {
I::to_simd_usize_unchecked(Simd::gather_ptr(start_ptrs.wrapping_add(Simd::splat(1))))
}
.to_array();
starts
.into_iter()
.zip(ends)
.zip(inbounds.to_array())
.enumerate()
.filter_map(|(j, r)| r.1.then_some((j, r.0)))
.any(|(j, (start, end))| {
let mut n_center = [Simd::splat(A::ZERO); K];
for k in 0..K {
n_center[k] = Simd::splat(centers[k][j]);
}
let rs = Simd::splat(radii[j]);
let rs_sq = rs * rs;
(start..end).step_by(L).any(|i| {
let mut dists_sq = Simd::splat(A::ZERO);
#[allow(clippy::needless_range_loop)]
for k in 0..K {
let vals: Simd<A, L> = unsafe { *self.afforded[k].as_ptr().add(i).cast() };
let diff = vals - n_center[k];
dists_sq += diff * diff;
}
Simd::<A, L>::mask_any(dists_sq.simd_le(rs_sq))
})
})
}
}
fn distsq<A: Axis, const K: usize>(a: [A; K], b: [A; K]) -> A {
let mut total = A::ZERO;
for i in 0..K {
total = total + (a[i] - b[i]).square();
}
total
}
impl<A, const K: usize> Aabb<A, K>
where
A: Axis,
{
const ALL: Self = Self {
lo: [A::NEG_INFINITY; K],
hi: [A::INFINITY; K],
};
const fn split(mut self, test: A, dim: usize) -> (Self, Self) {
let mut rhs = self;
self.hi[dim] = test;
rhs.lo[dim] = test;
(self, rhs)
}
fn contained_by_ball(&self, center: &[A; K], rsq: A) -> bool {
let mut dist = A::ZERO;
#[allow(clippy::needless_range_loop)]
for k in 0..K {
let lo_diff = (self.lo[k] - center[k]).square();
let hi_diff = (self.hi[k] - center[k]).square();
dist = dist + if lo_diff < hi_diff { hi_diff } else { lo_diff };
}
dist <= rsq
}
#[doc(hidden)]
pub fn closest_distsq_to(&self, pt: &[A; K]) -> A {
let mut dist = A::ZERO;
#[allow(clippy::needless_range_loop)]
for d in 0..K {
let clamped = clamp(pt[d], self.lo[d], self.hi[d]);
dist = dist + (pt[d] - clamped).square();
}
dist
}
fn insert(&mut self, point: &[A; K]) {
self.lo
.iter_mut()
.zip(&mut self.hi)
.zip(point)
.for_each(|((l, h), &x)| {
if *l > x {
*l = x;
}
if x > *h {
*h = x;
}
});
}
}
#[inline]
unsafe fn median_partition<A: Axis, const K: usize>(points: &mut [[A; K]], k: usize) -> A {
unsafe {
let (lh, med_hi, _) = points.select_nth_unstable_by(points.len() / 2, |a, b| {
a[k].partial_cmp(&b[k]).unwrap_unchecked()
});
let med_lo = lh
.iter_mut()
.map(|p| p[k])
.max_by(|a, b| a.partial_cmp(b).unwrap_unchecked())
.unwrap();
A::in_between(med_lo, med_hi[k])
}
}
#[cfg(test)]
mod tests {
use rand::{Rng, SeedableRng, rngs::SmallRng};
use super::*;
#[test]
fn build_simple() {
let points = [[0.0, 0.1], [0.4, -0.2], [-0.2, -0.1]];
let t = Capt::<2>::new(&points, (0.0, 0.2), 1);
println!("{t:?}");
}
#[test]
fn exact_query_single() {
let points = [[0.0, 0.1], [0.4, -0.2], [-0.2, -0.1]];
let t = Capt::<2>::new(&points, (0.0, 0.2), 1);
println!("{t:?}");
let q0 = [0.0, -0.01];
assert!(t.collides(&q0, 0.12));
}
#[test]
fn another_one() {
let points = [[0.0, 0.1], [0.4, -0.2], [-0.2, -0.1]];
let t = Capt::<2>::new(&points, (0.0, 0.2), 1);
println!("{t:?}");
let q0 = [0.003_265_380_9, 0.106_527_805];
assert!(t.collides(&q0, 0.02));
}
#[test]
fn three_d() {
let points = [
[0.0; 3],
[0.1, -1.1, 0.5],
[-0.2, -0.3, 0.25],
[0.1, -1.1, 0.5],
];
let t = Capt::<3>::new(&points, (0.0, 0.2), 1);
println!("{t:?}");
assert!(t.collides(&[0.0, 0.1, 0.0], 0.11));
assert!(!t.collides(&[0.0, 0.1, 0.0], 0.05));
}
#[test]
fn fuzz() {
const R: f32 = 0.04;
let points = [[0.0, 0.1], [0.4, -0.2], [-0.2, -0.1]];
let mut rng = SmallRng::seed_from_u64(1234);
let t = Capt::<2>::new(&points, (0.0, R), 1);
for _ in 0..10_000 {
let p = [rng.random_range(-1.0..1.0), rng.random_range(-1.0..1.0)];
let collides = points.iter().any(|a| distsq(*a, p) <= R * R);
println!("{p:?}; {collides}");
assert_eq!(collides, t.collides(&p, R));
}
}
#[test]
fn weird_bounds() {
const R_SQ: f32 = 1.0;
let points = [
[-1.0, 0.0],
[0.001, 0.0],
[0.0, 0.5],
[-1.0, 10.0],
[-2.0, 10.0],
[-3.0, 10.0],
[-0.5, 0.0],
[-11.0, 1.0],
[-1.0, -0.5],
[1.0, 1.0],
[2.0, 2.0],
[3.0, 3.0],
[4.0, 4.0],
[5.0, 5.0],
[6.0, 6.0],
[7.0, 7.0],
];
let rsq_range = (R_SQ - f32::EPSILON, R_SQ + f32::EPSILON);
let t = Capt::<2>::new(&points, rsq_range, 1);
println!("{t:?}");
assert!(t.collides(&[-0.001, -0.2], 1.0));
}
#[test]
#[allow(clippy::float_cmp)]
fn does_it_partition() {
let mut points = vec![[1.0], [2.0], [1.5], [2.1], [-0.5]];
let median = unsafe { median_partition(&mut points, 0) };
assert_eq!(median, 1.25);
for p0 in &points[..points.len() / 2] {
assert!(p0[0] <= median);
}
for p0 in &points[points.len() / 2..] {
assert!(p0[0] >= median);
}
}
#[test]
fn point_radius() {
let points = [[0.0, 0.0], [0.0, 1.0]];
let r_range = (0.0, 1.0);
let capt: Capt<_, _, u32> = Capt::with_point_radius(&points, r_range, 0.5, 8);
assert!(capt.collides(&[0.6, 0.0], 0.2));
assert!(!capt.collides(&[0.6, 0.0], 0.05));
}
}