#![macro_use]
use std::array::from_fn;
macro_rules! create_simd {
($f:ident, $fx:ident, $vec3_ty:ident, $vec3_name:ident, $vec4_ty:ident, $quat_ty:ident, $quat_name:ident, $lanes:expr) => {
#[derive(Clone, Copy, Debug)]
pub struct $vec3_ty {
pub x: $fx,
pub y: $fx,
pub z: $fx,
}
#[derive(Clone, Copy, Debug)]
pub struct $vec4_ty {
pub x: $fx,
pub y: $fx,
pub z: $fx,
pub w: $fx,
}
impl Neg for $vec3_ty {
type Output = Self;
fn neg(self) -> Self::Output {
let zero = $fx::splat(0.);
Self {
x: zero - self.x,
y: zero - self.y,
z: zero - self.z,
}
}
}
impl Add<$f> for $vec3_ty {
type Output = Self;
fn add(self, rhs: $f) -> Self::Output {
let s = $fx::splat(rhs);
self + s
}
}
impl Sub<$f> for $vec3_ty {
type Output = Self;
fn sub(self, rhs: $f) -> Self::Output {
let s = $fx::splat(rhs);
self - s
}
}
impl Mul<$f> for $vec3_ty {
type Output = Self;
fn mul(self, rhs: $f) -> Self::Output {
let s = $fx::splat(rhs);
self * s
}
}
impl Div<$f> for $vec3_ty {
type Output = Self;
fn div(self, rhs: $f) -> Self::Output {
let s = $fx::splat(rhs);
self / s
}
}
impl Neg for $vec4_ty {
type Output = Self;
fn neg(self) -> Self::Output {
let zero = $fx::splat(0.);
Self {
x: zero - self.x,
y: zero - self.y,
z: zero - self.z,
w: zero - self.w,
}
}
}
impl Mul<$f> for $vec4_ty {
type Output = Self;
fn mul(self, rhs: $f) -> Self::Output {
self * $fx::splat(rhs)
}
}
impl $vec3_ty {
pub fn from_array(arr: [Vec3; $lanes]) -> Self {
let x_vals = arr.iter().map(|v| v.x).collect::<Vec<_>>();
let y_vals = arr.iter().map(|v| v.y).collect::<Vec<_>>();
let z_vals = arr.iter().map(|v| v.z).collect::<Vec<_>>();
Self {
x: $fx::from_slice(&x_vals),
y: $fx::from_slice(&y_vals),
z: $fx::from_slice(&z_vals),
}
}
pub fn from_slice(slice: &[Vec3]) -> Self {
let x_vals = slice.iter().map(|v| v.x).collect::<Vec<_>>();
let y_vals = slice.iter().map(|v| v.y).collect::<Vec<_>>();
let z_vals = slice.iter().map(|v| v.z).collect::<Vec<_>>();
Self {
x: $fx::from_slice(&x_vals),
y: $fx::from_slice(&y_vals),
z: $fx::from_slice(&z_vals),
}
}
pub fn to_array(self) -> [Vec3; $lanes] {
let x_arr = self.x.to_array();
let y_arr = self.y.to_array();
let z_arr = self.z.to_array();
let mut out = [Vec3 {
x: 0.0,
y: 0.0,
z: 0.0,
}; $lanes];
for i in 0..$lanes {
out[i] = Vec3 {
x: x_arr[i],
y: y_arr[i],
z: z_arr[i],
};
}
out
}
pub fn new_zero() -> Self {
let zero = $fx::splat(0.);
Self {
x: zero,
y: zero,
z: zero,
}
}
pub fn splat(val: Vec3) -> Self {
Self {
x: $fx::splat(val.x),
y: $fx::splat(val.y),
z: $fx::splat(val.z),
}
}
}
impl $vec4_ty {
pub fn from_array(arr: [Vec4; $lanes]) -> Self {
let x_vals = arr.iter().map(|v| v.x).collect::<Vec<_>>();
let y_vals = arr.iter().map(|v| v.y).collect::<Vec<_>>();
let z_vals = arr.iter().map(|v| v.z).collect::<Vec<_>>();
let w_vals = arr.iter().map(|v| v.w).collect::<Vec<_>>();
Self {
x: $fx::from_slice(&x_vals),
y: $fx::from_slice(&y_vals),
z: $fx::from_slice(&z_vals),
w: $fx::from_slice(&w_vals),
}
}
pub fn from_slice(slice: &[Vec4]) -> Self {
let x_vals = slice.iter().map(|v| v.x).collect::<Vec<_>>();
let y_vals = slice.iter().map(|v| v.y).collect::<Vec<_>>();
let z_vals = slice.iter().map(|v| v.z).collect::<Vec<_>>();
let w_vals = slice.iter().map(|v| v.z).collect::<Vec<_>>();
Self {
x: $fx::from_slice(&x_vals),
y: $fx::from_slice(&y_vals),
z: $fx::from_slice(&z_vals),
w: $fx::from_slice(&w_vals),
}
}
pub fn to_array(self) -> [Vec4; $lanes] {
let x_arr = self.x.to_array();
let y_arr = self.y.to_array();
let z_arr = self.z.to_array();
let w_arr = self.w.to_array();
let mut out = [Vec4 {
x: 0.0,
y: 0.0,
z: 0.0,
w: 0.0,
}; $lanes];
for i in 0..$lanes {
out[i] = Vec4 {
x: x_arr[i],
y: y_arr[i],
z: z_arr[i],
w: w_arr[i],
};
}
out
}
pub fn new_zero() -> Self {
let zero = $fx::splat(0.);
Self {
x: zero,
y: zero,
z: zero,
w: zero,
}
}
pub fn splat(val: Vec4) -> Self {
Self {
x: $fx::splat(val.x),
y: $fx::splat(val.y),
z: $fx::splat(val.z),
w: $fx::splat(val.w),
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct $quat_ty {
pub w: $fx,
pub x: $fx,
pub y: $fx,
pub z: $fx,
}
impl Mul<$f> for $quat_ty {
type Output = Self;
fn mul(self, rhs: $f) -> Self::Output {
let s = $fx::splat(rhs);
Self {
w: self.w * s,
x: self.x * s,
y: self.y * s,
z: self.z * s,
}
}
}
impl $quat_ty {
pub fn from_array(slots: [Quaternion; $lanes]) -> Self {
let mut w_arr = [0.; $lanes];
let mut x_arr = [0.; $lanes];
let mut y_arr = [0.; $lanes];
let mut z_arr = [0.; $lanes];
for i in 0..$lanes {
w_arr[i] = slots[i].w;
x_arr[i] = slots[i].x;
y_arr[i] = slots[i].y;
z_arr[i] = slots[i].z;
}
Self {
w: $fx::from_slice(&w_arr),
x: $fx::from_slice(&x_arr),
y: $fx::from_slice(&y_arr),
z: $fx::from_slice(&z_arr),
}
}
pub fn from_slice(slice: &[Quaternion]) -> Self {
let mut w_arr = [0.; $lanes];
let mut x_arr = [0.; $lanes];
let mut y_arr = [0.; $lanes];
let mut z_arr = [0.; $lanes];
for i in 0..$lanes {
w_arr[i] = slice[i].w;
x_arr[i] = slice[i].x;
y_arr[i] = slice[i].y;
z_arr[i] = slice[i].z;
}
Self {
w: $fx::from_slice(&w_arr),
x: $fx::from_slice(&x_arr),
y: $fx::from_slice(&y_arr),
z: $fx::from_slice(&z_arr),
}
}
pub fn to_array(self) -> [Quaternion; $lanes] {
let w_arr = self.w.to_array();
let x_arr = self.x.to_array();
let y_arr = self.y.to_array();
let z_arr = self.z.to_array();
let mut out = [Quaternion {
w: 0.0,
x: 0.0,
y: 0.0,
z: 0.0,
}; $lanes];
for i in 0..$lanes {
out[i] = Quaternion {
w: w_arr[i],
x: x_arr[i],
y: y_arr[i],
z: z_arr[i],
};
}
out
}
pub fn new_identity() -> Self {
Self {
w: $fx::splat(1.),
x: $fx::splat(0.),
y: $fx::splat(0.),
z: $fx::splat(0.),
}
}
pub fn splat(val: Quaternion) -> Self {
Self {
w: $fx::splat(val.w),
x: $fx::splat(val.x),
y: $fx::splat(val.y),
z: $fx::splat(val.z),
}
}
pub fn to_vec(self) -> $vec3_ty {
$vec3_ty {
x: self.x,
y: self.y,
z: self.z,
}
}
}
paste::paste! {
pub fn [<pack_ $vec3_name>](vecs: &[Vec3]) -> (Vec<$vec3_ty>, usize) {
let remainder = vecs.len() % $lanes;
let padding_needed = if remainder == 0 {
0
} else {
$lanes - remainder
};
let mut padded = Vec::with_capacity(vecs.len() + padding_needed);
padded.extend_from_slice(vecs);
padded.extend((0..padding_needed).map(|_| Vec3::new_zero()));
let data = padded
.chunks_exact($lanes)
.map(|chunk| {
let arr: [Vec3; $lanes] = chunk.try_into().unwrap();
$vec3_ty::from_array(arr)
})
.collect();
let valid_lanes_last_chunk = if remainder == 0 { $lanes } else { remainder };
(data, valid_lanes_last_chunk)
}
pub fn [<unpack_ $vec3_name>](vals: &[$vec3_ty], len_orig: usize) -> Vec<Vec3> {
let mut result = Vec::with_capacity(len_orig);
for (i, val) in vals.iter().enumerate() {
let lanes = if i == vals.len() - 1 {
let rem = len_orig % $lanes;
if rem == 0 { $lanes } else { rem }
} else {
$lanes
};
result.extend(&val.to_array()[..lanes]);
}
result
}
pub fn [<pack_ $quat_name>](vals: &[Quaternion]) -> (Vec<$quat_ty>, usize) {
let remainder = vals.len() % $lanes;
let padding_needed = if remainder == 0 {
0
} else {
$lanes - remainder
};
let mut padded = Vec::with_capacity(vals.len() + padding_needed);
padded.extend_from_slice(vals);
padded.extend((0..padding_needed).map(|_| Quaternion::new_identity()));
let data = padded
.chunks_exact($lanes)
.map(|chunk| {
let arr: [Quaternion; $lanes] = chunk.try_into().unwrap();
$quat_ty::from_array(arr)
})
.collect();
let valid_lanes_last_chunk = if remainder == 0 { $lanes } else { remainder };
(data, valid_lanes_last_chunk)
}
pub fn [<unpack_ $quat_name>](vals: &[$quat_ty], len_orig: usize) -> Vec<Quaternion> {
let mut result = Vec::with_capacity(len_orig);
for (i, val) in vals.iter().enumerate() {
let lanes = if i == vals.len() - 1 {
let rem = len_orig % $lanes;
if rem == 0 { $lanes } else { rem }
} else {
$lanes
};
result.extend(&val.to_array()[..lanes]);
}
result
}
pub fn [<pack_x $lanes>](vals: &[$f]) -> (Vec<$fx>, usize) {
let remainder = vals.len() % $lanes;
let padding_needed = if remainder == 0 {
0
} else {
$lanes - remainder
};
let mut padded = Vec::with_capacity(vals.len() + padding_needed);
padded.extend_from_slice(vals);
padded.extend((0..padding_needed).map(|_| 0.));
let data = padded
.chunks_exact($lanes)
.map(|chunk| $fx::load(chunk.as_ptr()))
.collect();
let valid_lanes_last_chunk = if remainder == 0 { $lanes } else { remainder };
(data, valid_lanes_last_chunk)
}
pub fn [<unpack_x $lanes>](vals: &[$fx], len_orig: usize) -> Vec<$f> {
let mut result = Vec::with_capacity(len_orig);
for (i, val) in vals.iter().enumerate() {
let lanes = if i == vals.len() - 1 {
let rem = len_orig % $lanes;
if rem == 0 { $lanes } else { rem }
} else {
$lanes
};
result.extend(&val.to_array()[..lanes]);
}
result
}
}
};
}
pub fn pack_slice<T, const LANES: usize>(vals: &[T]) -> (Vec<[T; LANES]>, usize)
where
T: Copy + Clone + Default,
{
let remainder = vals.len() % LANES;
let padding_needed = if remainder == 0 { 0 } else { LANES - remainder };
let mut padded = Vec::with_capacity(vals.len() + padding_needed);
padded.extend_from_slice(vals);
padded.extend((0..padding_needed).map(|_| T::default()));
let data = padded
.chunks_exact(LANES)
.map(|chunk| {
let mut arr = [T::default(); LANES];
arr.clone_from_slice(chunk);
arr
})
.collect();
let valid_lanes_last_chunk = if remainder == 0 { LANES } else { remainder };
(data, valid_lanes_last_chunk)
}
pub fn pack_slice_noncopy<T, const LANES: usize>(vals: &[T]) -> (Vec<[T; LANES]>, usize)
where
T: Clone + Default,
{
let remainder = vals.len() % LANES;
let valid_lanes_last = if remainder == 0 { LANES } else { remainder };
let padding_needed = if remainder == 0 { 0 } else { LANES - remainder };
let mut padded: Vec<T> = vals.to_vec(); padded.reserve(padding_needed);
padded.extend((0..padding_needed).map(|_| T::default()));
let data: Vec<[T; LANES]> = padded
.chunks_exact(LANES)
.map(|chunk| {
from_fn(|i| chunk[i].clone())
})
.collect();
(data, valid_lanes_last)
}
pub fn unpack_slice<T: Copy, const LANES: usize>(vals: &[[T; LANES]], len_orig: usize) -> Vec<T> {
let mut result = Vec::with_capacity(len_orig);
for (i, chunk) in vals.iter().enumerate() {
let lanes = if i == vals.len() - 1 {
let rem = len_orig % LANES;
if rem == 0 { LANES } else { rem }
} else {
LANES
};
result.extend(chunk[..lanes].iter().copied());
}
result
}