use std::arch::aarch64::*;
pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
let mut bitmask = bitmask;
bitmask ^= bitmask << 1;
bitmask ^= bitmask << 2;
bitmask ^= bitmask << 4;
bitmask ^= bitmask << 8;
bitmask ^= bitmask << 16;
bitmask ^= bitmask << 32;
bitmask
}
#[inline(always)]
pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 {
#[inline(always)]
unsafe fn chunk_nonspace_bits(input: uint8x16_t) -> uint8x16_t {
const LOW_TAB: uint8x16_t =
unsafe { std::mem::transmute([16u8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0]) };
const HIGH_TAB: uint8x16_t =
unsafe { std::mem::transmute([8u8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0]) };
let white_mask = vmovq_n_u8(0x18);
let lo4 = vandq_u8(input, vmovq_n_u8(0xf));
let hi4 = vshrq_n_u8(input, 4);
let lo4_sf = vqtbl1q_u8(LOW_TAB, lo4);
let hi4_sf = vqtbl1q_u8(HIGH_TAB, hi4);
let v = vandq_u8(lo4_sf, hi4_sf);
vtstq_u8(v, white_mask)
}
!sonic_simd::neon::to_bitmask64(
chunk_nonspace_bits(vld1q_u8(data.as_ptr())),
chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(16))),
chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(32))),
chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(48))),
)
}