use core::iter::{Product, Sum};
use core::mem::{self, MaybeUninit};
use core::ops::*;
use core::ptr;
use generic_array::{ArrayLength, GenericArray};
use typenum::marker_traits::Unsigned;
use crate::{inner, Mask, Vector, Vectorizable};
macro_rules! bin_op_impl {
($name: ident, $tr: ident, $meth: ident, $tr_assign: ident, $meth_assign: ident) => {
impl<B, S> $tr for $name<B, S>
where
B: inner::Repr + $tr<Output = B> + Copy,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
type Output = Self;
#[inline]
fn $meth(self, rhs: Self) -> Self {
unsafe {
let mut result = MaybeUninit::<GenericArray<B, S>>::uninit();
for i in 0..S::USIZE {
ptr::write(
result.as_mut_ptr().cast::<B>().add(i),
$tr::$meth(self.content[i], rhs.content[i]),
);
}
Self {
content: result.assume_init(),
}
}
}
}
impl<B, S> $tr_assign for $name<B, S>
where
B: inner::Repr + $tr_assign + Copy,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
#[inline]
fn $meth_assign(&mut self, rhs: Self) {
for i in 0..S::USIZE {
$tr_assign::$meth_assign(&mut self.content[i], rhs.content[i]);
}
}
}
};
}
macro_rules! una_op_impl {
($name: ident, $tr: ident, $meth: ident) => {
impl<B, S> $tr for $name<B, S>
where
B: inner::Repr + $tr<Output = B> + Copy,
S: Unsigned + ArrayLength<B>,
S::ArrayType: Copy,
{
type Output = Self;
#[inline]
fn $meth(self) -> Self {
unsafe {
let mut result = MaybeUninit::<GenericArray<B, S>>::uninit();
for i in 0..S::USIZE {
ptr::write(
result.as_mut_ptr().cast::<B>().add(i),
$tr::$meth(self.content[i]),
);
}
Self {
content: result.assume_init(),
}
}
}
}
};
}
macro_rules! cmp_op {
($tr: ident, $op: ident) => {
#[inline]
fn $op(self, other: Self) -> Self::Mask
where
Self::Base: $tr,
{
let mut result = MaybeUninit::<GenericArray<B::Mask, S>>::uninit();
unsafe {
for i in 0..S::USIZE {
ptr::write(
result.as_mut_ptr().cast::<B::Mask>().add(i),
B::Mask::from_bool(self.content[i].$op(&other.content[i])),
);
}
Self::Mask {
content: result.assume_init(),
}
}
}
};
}
macro_rules! vector_impl {
($name: ident, $align: expr) => {
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
#[repr(C, align($align))]
pub struct $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
content: GenericArray<B, S>,
}
impl<B, S> Vector for $name<B, S>
where
B: inner::Repr + 'static,
S: ArrayLength<B> + ArrayLength<B::Mask> + 'static,
<S as ArrayLength<B>>::ArrayType: Copy,
<S as ArrayLength<B::Mask>>::ArrayType: Copy,
{
type Base = B;
type Lanes = S;
type Mask = $name<B::Mask, S>;
#[inline]
unsafe fn new_unchecked(input: *const B) -> Self {
assert!(
isize::MAX as usize > mem::size_of::<Self>(),
"Vector type too huge",
);
Self {
content: ptr::read(input.cast()),
}
}
#[inline]
fn splat(value: B) -> Self {
assert!(
isize::MAX as usize > mem::size_of::<Self>(),
"Vector type too huge",
);
let mut result = MaybeUninit::<GenericArray<B, S>>::uninit();
unsafe {
for i in 0..S::USIZE {
ptr::write(result.as_mut_ptr().cast::<B>().add(i), value);
}
Self {
content: result.assume_init(),
}
}
}
#[inline]
fn gather_load<I, Idx>(input: I, idx: Idx) -> Self
where
I: AsRef<[B]>,
Idx: AsRef<[usize]>,
{
let input = input.as_ref();
let idx = idx.as_ref();
assert!(
isize::MAX as usize > mem::size_of::<Self>(),
"Vector type too huge",
);
assert_eq!(
Self::LANES,
idx.len(),
"Gathering vector from wrong number of indexes"
);
assert!(idx.iter().all(|&l| l < input.len()), "Gather out of bounds");
let mut result = MaybeUninit::<GenericArray<B, S>>::uninit();
unsafe {
for i in 0..Self::LANES {
let idx = *idx.get_unchecked(i);
let input = *input.get_unchecked(idx);
ptr::write(result.as_mut_ptr().cast::<B>().add(i), input);
}
Self {
content: result.assume_init(),
}
}
}
#[inline]
fn gather_load_masked<I, Idx, M, MB>(mut self, input: I, idx: Idx, mask: M) -> Self
where
I: AsRef<[B]>,
Idx: AsRef<[usize]>,
M: AsRef<[MB]>,
MB: Mask,
{
let input = input.as_ref();
let idx = idx.as_ref();
let mask = mask.as_ref();
let len = idx.len();
assert_eq!(
Self::LANES,
len,
"Gathering vector from wrong number of indexes"
);
assert_eq!(Self::LANES, mask.len(), "Gathering with wrong sized mask");
for i in 0..Self::LANES {
unsafe {
if mask.get_unchecked(i).bool() {
let idx = *idx.get_unchecked(i);
self[i] = input[idx];
}
}
}
self
}
#[inline]
fn scatter_store<O, Idx>(self, mut output: O, idx: Idx)
where
O: AsMut<[B]>,
Idx: AsRef<[usize]>,
{
let output = output.as_mut();
let idx = idx.as_ref();
assert_eq!(
Self::LANES,
idx.len(),
"Scattering vector to wrong number of indexes"
);
assert!(
idx.iter().all(|&l| l < output.len()),
"Scatter out of bounds"
);
for i in 0..Self::LANES {
unsafe {
let idx = *idx.get_unchecked(i);
*output.get_unchecked_mut(idx) = self[i];
}
}
}
#[inline]
fn scatter_store_masked<O, Idx, M, MB>(self, mut output: O, idx: Idx, mask: M)
where
O: AsMut<[B]>,
Idx: AsRef<[usize]>,
M: AsRef<[MB]>,
MB: Mask,
{
let output = output.as_mut();
let idx = idx.as_ref();
let mask = mask.as_ref();
assert_eq!(
Self::LANES,
idx.len(),
"Scattering vector to wrong number of indexes"
);
assert_eq!(
Self::LANES,
mask.len(),
"Scattering vector with wrong sized mask"
);
let in_bounds = idx
.iter()
.enumerate()
.all(|(i, &l)| {
!mask[i].bool() || l < output.len()
});
assert!(in_bounds, "Scatter out of bounds");
for i in 0..Self::LANES {
if mask[i].bool() {
unsafe {
let idx = *idx.get_unchecked(i);
*output.get_unchecked_mut(idx) = self[i];
}
}
}
}
#[inline]
fn blend<M, MB>(self, other: Self, mask: M) -> Self
where
M: AsRef<[MB]>,
MB: Mask,
{
let mut result = MaybeUninit::<GenericArray<B, S>>::uninit();
let mask = mask.as_ref();
unsafe {
for i in 0..Self::LANES {
ptr::write(
result.as_mut_ptr().cast::<B>().add(i),
if mask[i].bool() { other[i] } else { self[i] },
);
}
Self {
content: result.assume_init(),
}
}
}
#[inline]
fn horizontal_sum(self) -> B
where
B: Add<Output = B>,
{
#[inline(always)]
fn inner<B: Copy + Add<Output = B>>(d: &[B]) -> B {
if d.len() == 1 {
d[0]
} else {
let mid = d.len() / 2;
inner(&d[..mid]) + inner(&d[mid..])
}
}
inner(&self.content)
}
#[inline]
fn horizontal_product(self) -> B
where
B: Mul<Output = B>,
{
#[inline(always)]
fn inner<B: Copy + Mul<Output = B>>(d: &[B]) -> B {
if d.len() == 1 {
d[0]
} else {
let mid = d.len() / 2;
inner(&d[..mid]) * inner(&d[mid..])
}
}
inner(&self.content)
}
cmp_op!(PartialEq, eq);
cmp_op!(PartialOrd, lt);
cmp_op!(PartialOrd, gt);
cmp_op!(PartialOrd, le);
cmp_op!(PartialOrd, ge);
}
impl<B, S> Deref for $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
type Target = [B];
#[inline]
fn deref(&self) -> &[B] {
&self.content
}
}
impl<B, S> DerefMut for $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
#[inline]
fn deref_mut(&mut self) -> &mut [B] {
&mut self.content
}
}
impl<B, S> AsRef<[B]> for $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
#[inline]
fn as_ref(&self) -> &[B] {
&self.content
}
}
impl<B, S> AsMut<[B]> for $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
#[inline]
fn as_mut(&mut self) -> &mut [B] {
&mut self.content
}
}
impl<B, S> Index<usize> for $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
type Output = B;
#[inline]
fn index(&self, idx: usize) -> &B {
&self.content[idx]
}
}
impl<B, S> IndexMut<usize> for $name<B, S>
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
#[inline]
fn index_mut(&mut self, idx: usize) -> &mut B {
&mut self.content[idx]
}
}
impl<B, S> Sum for $name<B, S>
where
B: inner::Repr + AddAssign,
S: ArrayLength<B>,
S::ArrayType: Copy,
Self: Default,
{
#[inline]
fn sum<I>(iter: I) -> Self
where
I: Iterator<Item = Self>,
{
let mut result = Self::default();
for i in iter {
result += i;
}
result
}
}
impl<B, S> Product for $name<B, S>
where
B: inner::Repr + MulAssign,
S: ArrayLength<B>,
S::ArrayType: Copy,
Self: Vector<Base = B, Lanes = S>,
<Self as Vector>::Mask: AsRef<[B::Mask]>,
{
#[inline]
fn product<I>(iter: I) -> Self
where
I: Iterator<Item = Self>,
{
let mut result = Self::splat(B::ONE);
for i in iter {
result *= i;
}
result
}
}
impl<'a, B, S> Vectorizable<$name<B, S>> for &'a [$name<B, S>]
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
type Padding = ();
type Vectorizer = &'a [$name<B, S>];
fn create(self, _pad: Option<()>) -> (Self::Vectorizer, usize, Option<$name<B, S>>) {
(self, self.len(), None)
}
}
impl<'a, B, S> Vectorizable<&'a mut $name<B, S>> for &'a mut [$name<B, S>]
where
B: inner::Repr,
S: ArrayLength<B>,
S::ArrayType: Copy,
{
type Padding = ();
type Vectorizer = &'a mut [$name<B, S>];
fn create(
self,
_pad: Option<()>,
) -> (Self::Vectorizer, usize, Option<&'a mut $name<B, S>>) {
let len = self.len();
(self, len, None)
}
}
bin_op_impl!($name, Add, add, AddAssign, add_assign);
bin_op_impl!($name, Sub, sub, SubAssign, sub_assign);
bin_op_impl!($name, Mul, mul, MulAssign, mul_assign);
bin_op_impl!($name, Div, div, DivAssign, div_assign);
bin_op_impl!($name, Rem, rem, RemAssign, rem_assign);
bin_op_impl!($name, BitAnd, bitand, BitAndAssign, bitand_assign);
bin_op_impl!($name, BitOr, bitor, BitOrAssign, bitor_assign);
bin_op_impl!($name, BitXor, bitxor, BitXorAssign, bitxor_assign);
bin_op_impl!($name, Shl, shl, ShlAssign, shl_assign);
bin_op_impl!($name, Shr, shr, ShrAssign, shr_assign);
una_op_impl!($name, Neg, neg);
una_op_impl!($name, Not, not);
};
}
vector_impl!(Packed1, 1);
vector_impl!(Packed2, 2);
vector_impl!(Packed4, 4);
vector_impl!(Packed8, 8);
vector_impl!(Packed16, 16);
vector_impl!(Packed32, 32);
#[cfg(test)]
mod tests {
use super::*;
use crate::prelude::*;
type V = u16x4;
#[test]
#[should_panic(expected = "Creating vector from the wrong sized slice (expected 4, got 3)")]
fn wrong_size_new() {
V::new([1, 2, 3]);
}
#[test]
fn shuffle() {
let v1 = V::new([1, 2, 3, 4]);
let v2 = V::gather_load(v1, [3, 1, 2, 0]);
assert_eq!(v2.deref(), &[4, 2, 3, 1]);
let v3 = V::gather_load(v2, [0, 0, 2, 2]);
assert_eq!(v3.deref(), &[4, 4, 3, 3]);
}
#[test]
fn gather() {
let data = (1..=10).collect::<Vec<_>>();
let v = V::gather_load(&data, [0, 2, 4, 6]);
assert_eq!(v.deref(), [1, 3, 5, 7]);
}
#[test]
fn scatter() {
let v = V::new([1, 2, 3, 4]);
let mut output = [0; 10];
v.scatter_store(&mut output, [1, 3, 5, 7]);
assert_eq!(output, [0, 1, 0, 2, 0, 3, 0, 4, 0, 0]);
}
#[test]
#[should_panic(expected = "Gather out of bounds")]
fn gather_oob() {
V::gather_load([1, 2, 3], [0, 1, 2, 3]);
}
#[test]
#[should_panic(expected = "Gathering vector from wrong number of indexes")]
fn gather_idx_cnt() {
V::gather_load([0, 1, 2, 3, 4], [0, 1]);
}
#[test]
#[should_panic(expected = "Scatter out of bounds")]
fn scatter_oob() {
let mut out = [0; 10];
V::new([1, 2, 3, 4]).scatter_store(&mut out, [0, 1, 2, 15]);
}
#[test]
#[should_panic(expected = "Scattering vector to wrong number of indexes")]
fn scatter_idx_cnt() {
let mut out = [0; 10];
V::new([1, 2, 3, 4]).scatter_store(&mut out, [0, 1, 2]);
}
const T: m32 = m32::TRUE;
const F: m32 = m32::FALSE;
#[test]
fn cmp() {
let v1 = u32x4::new([1, 3, 5, 7]);
let v2 = u32x4::new([2, 3, 4, 5]);
assert_eq!(v1.eq(v2), m32x4::new([F, T, F, F]));
assert_eq!(v1.le(v2), m32x4::new([T, T, F, F]));
assert_eq!(v1.ge(v2), m32x4::new([F, T, T, T]));
}
#[test]
fn blend() {
let v1 = u32x4::new([1, 2, 3, 4]);
let v2 = u32x4::new([5, 6, 7, 8]);
let b1 = v1.blend(v2, m32x4::new([F, T, F, T]));
assert_eq!(b1, u32x4::new([1, 6, 3, 8]));
let b2 = v1.blend(v2, [false, true, false, true]);
assert_eq!(b1, b2);
}
}