use core::mem;
pub trait Sized: core::marker::Sized {
const SIZE: usize = mem::size_of::<Self>();
const BITS: usize = Self::SIZE * 8;
}
impl<T: core::marker::Sized> Sized for T {}
cfg_if::cfg_if! {
if #[cfg(any(target_pointer_width = "8", target_pointer_width = "16"))] {
type _Packed = u32;
type _NonZeroPacked = core::num::NonZeroU32;
} else {
type _Packed = usize;
type _NonZeroPacked = core::num::NonZeroUsize;
}
}
pub type Packed = _Packed;
pub type NonZeroPacked = _NonZeroPacked;
pub trait Pack: Sized + Copy + Eq + 'static {
const LANES: usize = Packed::SIZE / Self::SIZE;
const ALIGN: usize = Self::LANES - 1;
const LO: Packed;
const HI: Packed;
fn broadcast(self) -> Packed;
}
macro_rules! impl_pack {
($ity:ty, $uty:ty) => {
impl Pack for $ity {
const LO: Packed = <$uty as Pack>::LO;
const HI: Packed = <$uty as Pack>::HI;
#[inline(always)]
fn broadcast(self) -> Packed {
<$uty as Pack>::broadcast(self as $uty)
}
}
impl Pack for $uty {
const LO: Packed = Packed::MAX / (<$uty>::MAX as Packed);
const HI: Packed = <$uty as Pack>::LO << (<$uty as Sized>::BITS - 1);
#[inline(always)]
fn broadcast(self) -> Packed {
(self as Packed) * <$uty as Pack>::LO
}
}
};
}
impl_pack!(i16, u16);
impl_pack!(i32, u32);
#[inline(always)]
pub fn simd_eq<T: Pack>(a: Packed, b: Packed) -> Packed {
let xor = a ^ b;
xor.wrapping_sub(<T as Pack>::LO) & !xor & <T as Pack>::HI
}