use core::mem::size_of;
use core::ops::Not;
use super::types::Vector;
pub use core::intrinsics::simd::*;
pub trait Imm<T> {
const VAL: T;
}
#[inline(always)]
pub unsafe fn simd_set_first<T: Vector>(value: T::Elem) -> T
where T::Elem: Default {
simd_insert(T::splat(T::Elem::default()), 0, value)
}
#[inline(always)]
pub unsafe fn simd_not<T: Vector>(x: T) -> T
where T::Elem: Default + Not<Output = T::Elem> {
simd_xor(x, T::splat(!T::Elem::default()))
}
#[inline(always)]
pub unsafe fn simd_andnot<T: Vector>(x: T, y: T) -> T
where T::Elem: Default + Not<Output = T::Elem> {
simd_and(x, simd_not(y))
}
#[inline(always)]
pub unsafe fn simd_avg<T: Vector, U: Vector>(x: T, y: T) -> T
where U::Elem: From<u8> {
let one = U::splat(1u8.into());
let sum = simd_add(simd_cast(x), simd_cast(y));
let avg = simd_shr(simd_add(sum, one), one);
simd_cast(avg)
}
#[inline(always)]
pub unsafe fn simd_max<T: Vector>(x: T, y: T) -> T {
simd_select(simd_gt::<T, T>(x, y), x, y)
}
#[inline(always)]
pub unsafe fn simd_min<T: Vector>(x: T, y: T) -> T {
simd_select(simd_gt::<T, T>(x, y), y, x)
}
#[inline(always)]
pub unsafe fn simd_shl_all<T: Vector, const BITS: u8>(x: T) -> T
where T::Elem: From<u8> {
let size = size_of::<T::Elem>() as u8;
if BITS >= size * 8 { return T::splat(0u8.into()) };
simd_shl(x, T::splat(BITS.into()))
}
#[inline(always)]
pub unsafe fn simd_shr_all<T: Vector, const BITS: u8>(x: T) -> T
where T::Elem: From<u8> {
let size = size_of::<T::Elem>() as u8;
if BITS >= size * 8 {
simd_lt(x, T::splat(0u8.into()))
} else {
simd_shr(x, T::splat(BITS.into()))
}
}
#[inline(always)]
pub unsafe fn simd_abs<T: Vector>(x: T) -> T
where T::Elem: Default {
let neg: T = simd_lt(x, T::splat(T::Elem::default()));
simd_select(neg, simd_neg(x), x)
}
#[inline(always)]
pub unsafe fn simd_expand<T: Vector, U: Vector, R: Vector>(x: T) -> R
where T::Elem: Default, U: Vector<Elem = T::Elem>, R::Elem: From<T::Elem>,
[u32; R::LEN]: Sized {
simd_cast::<U, R>(simd_shuffle(x, T::splat(T::Elem::default()), const {
simd_slice_indices::<R>(0)
}))
}
pub const fn simd_insert_indices<T, R>(start: usize) -> [i32; R::LEN]
where T: Vector, R: Vector, [i32; R::LEN]: Sized {
let mut indices = [0i32; R::LEN];
let mut index = 0;
while index < indices.len() {
indices[index] = index as i32;
index += 1;
}
let mut index = 0;
while index < T::LEN && start + index < indices.len() {
indices[start + index] = (index + R::LEN) as i32;
index += 1;
}
indices
}
pub const fn simd_slice_indices<T: Vector>(start: usize) -> [i32; T::LEN]
where [i32; T::LEN]: Sized {
let mut indices = [0i32; T::LEN];
let mut index = 0;
while index < indices.len() {
indices[index] = (start + index) as i32;
index += 1;
}
indices
}
pub const fn simd_unpack_indices<T: Vector>(start: usize) -> [i32; T::LEN]
where [i32; T::LEN]: Sized {
let mut indices = [0i32; T::LEN];
let mut index = 0;
while index * 2 < indices.len() {
indices[2 * index + 0] = (start + index + 0 * T::LEN) as i32;
indices[2 * index + 1] = (start + index + 1 * T::LEN) as i32;
index += 1;
}
indices
}
pub const fn simd_subslice_indices<T, R>(start: usize) -> [i32; R::LEN]
where T: Vector, R: Vector, [i32; R::LEN]: Sized {
let mut indices = [0i32; R::LEN];
let mut index = 0;
while index < indices.len() {
indices[index] = (
(start + index) % T::LEN +
(start + index) / T::LEN * R::LEN) as i32;
index += 1;
}
indices
}