use core::arch::aarch64::{
uint8x16_t, vceqq_u8, vget_lane_u64, vld1q_u8, vreinterpret_u64_u8, vreinterpretq_u16_u8,
vshrn_n_u16,
};
#[repr(align(16))]
struct Align16([u8; 16]);
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Vector(uint8x16_t);
impl Vector {
pub const SIZE: usize = 16;
#[inline]
pub const fn fill(v: u8) -> Self {
Self(unsafe { core::mem::transmute::<[u8; 16], uint8x16_t>([v; 16]) })
}
#[inline(always)]
pub fn load_unaligned(data: &[u8], offset: usize) -> Self {
unsafe {
debug_assert!(data[offset..].len() >= 16);
Self(vld1q_u8(data.as_ptr().add(offset)))
}
}
#[inline(always)]
pub fn load_aligned(data: &[u8], offset: usize) -> Self {
unsafe {
debug_assert!(data[offset..].len() >= 16);
debug_assert!(data.as_ptr().add(offset).addr().is_multiple_of(16));
Self(vld1q_u8(data.as_ptr().add(offset)))
}
}
#[inline(always)]
pub fn load_unaligned_remainder(data: &[u8], offset: usize) -> Self {
unsafe {
let mut buf = Align16([0; 16]);
buf.0[..data.len() - offset].copy_from_slice(&data[offset..]);
Self(vld1q_u8(buf.0.as_ptr()))
}
}
#[inline(always)]
pub fn eq(self, byte: u8) -> Self {
unsafe { Self(vceqq_u8(self.0, Self::fill(byte).0)) }
}
#[inline(always)]
pub fn movemask(self) -> Mask {
unsafe {
let mask = vreinterpretq_u16_u8(self.0);
let res = vshrn_n_u16(mask, 4); let matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
Mask(matches)
}
}
}
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Mask(u64);
impl Mask {
#[inline(always)]
pub fn has_match(&self) -> bool {
self.0 != 0
}
#[inline(always)]
pub fn first_match(&self) -> Match {
Match(self.0.trailing_zeros() as usize)
}
#[inline(always)]
pub fn clear_to(&mut self, m: Match) {
self.0 &= !(0xffff_ffff_ffff_ffff >> (63 - (m.0 + 3)));
}
}
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Match(usize);
impl Match {
#[inline(always)]
pub fn as_index(self) -> usize {
self.0 >> 2
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_clear_to() {
let mut mask = Mask(0b00000000_11110000_11111111_00000000);
mask.clear_to(mask.first_match());
assert_eq!(mask.0, 0b00000000_11110000_11110000_00000000);
}
}