use core::arch::x86_64::{
__m128i, _mm_cmpeq_epi8, _mm_load_si128, _mm_loadu_si128, _mm_movemask_epi8,
};
#[repr(align(16))]
struct Align16([u8; 16]);
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Vector(__m128i);
impl Vector {
pub const SIZE: usize = 16;
#[inline]
pub const fn fill(v: u8) -> Self {
Self(unsafe { core::mem::transmute::<[u8; 16], __m128i>([v; 16]) })
}
#[inline(always)]
pub fn load_unaligned(data: &[u8], offset: usize) -> Self {
unsafe {
debug_assert!(data[offset..].len() >= 16);
Self(_mm_loadu_si128(data.as_ptr().add(offset) as *const __m128i))
}
}
#[inline(always)]
pub fn load_aligned(data: &[u8], offset: usize) -> Self {
unsafe {
debug_assert!(data[offset..].len() >= 16);
debug_assert!(data.as_ptr().add(offset).addr().is_multiple_of(16));
Self(_mm_load_si128(data.as_ptr().add(offset) as *const __m128i))
}
}
#[inline(always)]
pub fn load_unaligned_remainder(data: &[u8], offset: usize) -> Self {
unsafe {
let mut buf = Align16([0; 16]);
buf.0[..data.len() - offset].copy_from_slice(&data[offset..]);
Self(_mm_load_si128(buf.0.as_ptr() as *const __m128i))
}
}
#[inline(always)]
pub fn eq(self, byte: u8) -> Self {
unsafe { Self(_mm_cmpeq_epi8(self.0, Self::fill(byte).0)) }
}
#[inline(always)]
pub fn movemask(self) -> Mask {
unsafe {
let value = _mm_movemask_epi8(self.0).cast_unsigned();
Mask(value)
}
}
}
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Mask(u32);
impl Mask {
#[inline(always)]
pub fn has_match(&self) -> bool {
self.0 != 0
}
#[inline(always)]
pub fn first_match(&self) -> Match {
Match(self.0.trailing_zeros() as usize)
}
#[inline(always)]
pub fn clear_to(&mut self, m: Match) {
self.0 &= !(0xffff_ffff >> (31 - m.0));
}
}
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Match(usize);
impl Match {
#[inline(always)]
pub fn as_index(&self) -> usize {
self.0
}
}