macro_rules! impl_scanner {
($(#[$feat:meta])* mod $name:ident) => {
$(#[$feat])*
#[inline]
pub(crate) unsafe fn scan_chunk(ptr: *const u8, bound: u8) -> u64 {
let bound_vec = unsafe { simd_splat(bound) };
const VECS_PER_CHUNK: usize = 64 / LANES;
let mut mask: u64 = 0;
let mut i = 0;
while i < VECS_PER_CHUNK {
let v = unsafe { simd_load(ptr.add(i * LANES)) };
if unsafe { simd_any_ge(v, bound_vec) } {
let sub_mask = unsafe { simd_cmpge_mask(v, bound_vec) } as u64;
mask |= sub_mask << (i * LANES);
}
i += 1;
}
mask
}
#[allow(dead_code)]
$(#[$feat])*
#[inline(never)]
pub(crate) unsafe fn scan_and_prefetch(
ptr: *const u8,
prefetch_l1: *const u8,
prefetch_l2: *const u8,
bound: u8,
) -> u64 {
use crate::simd::prefetch::{prefetch_l1_stream, prefetch_l2_stream};
unsafe {
prefetch_l1_stream(prefetch_l1);
prefetch_l2_stream(prefetch_l2);
}
unsafe { scan_chunk(ptr, bound) }
}
};
}
pub(crate) use impl_scanner;