pub(crate) mod prefetch;
pub(crate) mod scanner;
#[cfg(not(any(test, feature = "internal-test-api")))]
pub(crate) mod scalar;
#[cfg(any(test, feature = "internal-test-api"))]
#[doc(hidden)]
pub mod scalar;
#[cfg(all(target_arch = "x86_64", not(any(test, feature = "internal-test-api"))))]
pub(crate) mod x86_64;
#[cfg(all(target_arch = "x86_64", any(test, feature = "internal-test-api")))]
#[doc(hidden)]
pub mod x86_64;
#[cfg(all(target_arch = "aarch64", not(any(test, feature = "internal-test-api"))))]
pub(crate) mod aarch64;
#[cfg(all(target_arch = "aarch64", any(test, feature = "internal-test-api")))]
#[doc(hidden)]
pub mod aarch64;
#[cfg(all(target_arch = "wasm32", not(any(test, feature = "internal-test-api"))))]
pub(crate) mod wasm32;
#[cfg(all(target_arch = "wasm32", any(test, feature = "internal-test-api")))]
#[doc(hidden)]
pub mod wasm32;
#[allow(dead_code)]
pub(crate) struct SimdVTable {
pub scan_chunk: unsafe fn(*const u8, u8) -> u64,
pub scan_and_prefetch: unsafe fn(*const u8, *const u8, *const u8, u8) -> u64,
}
#[allow(dead_code)]
static VTABLE_SCALAR: SimdVTable = SimdVTable {
scan_chunk: scalar::scan_chunk,
scan_and_prefetch: scalar::scan_and_prefetch,
};
#[cfg(target_arch = "x86_64")]
#[allow(dead_code)]
static VTABLE_SSE42: SimdVTable = SimdVTable {
scan_chunk: x86_64::sse42::scan_chunk,
scan_and_prefetch: x86_64::sse42::scan_and_prefetch,
};
#[cfg(target_arch = "x86_64")]
#[allow(dead_code)]
static VTABLE_AVX2: SimdVTable = SimdVTable {
scan_chunk: x86_64::avx2::scan_chunk,
scan_and_prefetch: x86_64::avx2::scan_and_prefetch,
};
#[cfg(target_arch = "x86_64")]
#[allow(dead_code)]
static VTABLE_AVX512: SimdVTable = SimdVTable {
scan_chunk: x86_64::avx512::scan_chunk,
scan_and_prefetch: x86_64::avx512::scan_and_prefetch,
};
#[cfg(target_arch = "aarch64")]
#[allow(dead_code)]
static VTABLE_NEON: SimdVTable = SimdVTable {
scan_chunk: aarch64::neon::scan_chunk,
scan_and_prefetch: aarch64::neon::scan_and_prefetch,
};
#[cfg(target_arch = "aarch64")]
#[allow(dead_code)]
static VTABLE_SVE2: SimdVTable = SimdVTable {
scan_chunk: aarch64::sve2::scan_chunk,
scan_and_prefetch: aarch64::sve2::scan_and_prefetch,
};
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
static VTABLE_SIMD128: SimdVTable = SimdVTable {
scan_chunk: wasm32::simd128::scan_chunk,
scan_and_prefetch: wasm32::simd128::scan_and_prefetch,
};
#[cfg(all(feature = "std", target_arch = "x86_64"))]
mod dispatch {
use super::SimdVTable;
use std::sync::OnceLock;
static VTABLE: OnceLock<&'static SimdVTable> = OnceLock::new();
fn detect_best() -> &'static SimdVTable {
if std::is_x86_feature_detected!("avx512bw") {
return &super::VTABLE_AVX512;
}
if std::is_x86_feature_detected!("avx2") {
return &super::VTABLE_AVX2;
}
if std::is_x86_feature_detected!("sse4.2") {
return &super::VTABLE_SSE42;
}
&super::VTABLE_SCALAR
}
#[inline]
pub(crate) fn get_vtable() -> &'static SimdVTable {
VTABLE.get_or_init(detect_best)
}
}
#[cfg(all(not(feature = "std"), target_arch = "x86_64"))]
mod dispatch {
use super::SimdVTable;
#[inline]
pub(crate) fn get_vtable() -> &'static SimdVTable {
#[cfg(target_feature = "avx512bw")]
{
return &super::VTABLE_AVX512;
}
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512bw")))]
{
return &super::VTABLE_AVX2;
}
#[cfg(all(
target_feature = "sse4.2",
not(target_feature = "avx2"),
not(target_feature = "avx512bw")
))]
{
return &super::VTABLE_SSE42;
}
#[cfg(not(any(
target_feature = "sse4.2",
target_feature = "avx2",
target_feature = "avx512bw"
)))]
{
&super::VTABLE_SCALAR
}
}
}
#[cfg(all(feature = "std", target_arch = "aarch64"))]
mod dispatch {
use core::sync::atomic::{AtomicU8, Ordering};
static STATE: AtomicU8 = AtomicU8::new(0);
#[cold]
fn detect_and_store() -> u8 {
let chosen = if std::arch::is_aarch64_feature_detected!("sve2") {
2
} else {
1
};
STATE.store(chosen, Ordering::Relaxed);
chosen
}
#[inline]
pub(crate) fn sve2_enabled() -> bool {
let v = STATE.load(Ordering::Relaxed);
if v == 0 {
return detect_and_store() == 2;
}
v == 2
}
}
#[cfg(all(not(feature = "std"), target_arch = "aarch64"))]
mod dispatch {
#[inline(always)]
pub(crate) fn sve2_enabled() -> bool {
cfg!(target_feature = "sve2")
}
}
#[cfg(target_arch = "wasm32")]
mod dispatch {
use super::SimdVTable;
#[inline]
pub(crate) fn get_vtable() -> &'static SimdVTable {
#[cfg(target_feature = "simd128")]
{
return &super::VTABLE_SIMD128;
}
#[cfg(not(target_feature = "simd128"))]
{
return &super::VTABLE_SCALAR;
}
}
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "wasm32",
)))]
mod dispatch {
use super::SimdVTable;
#[inline]
pub(crate) fn get_vtable() -> &'static SimdVTable {
&super::VTABLE_SCALAR
}
}
#[inline]
pub(crate) unsafe fn scan_chunk(ptr: *const u8, bound: u8) -> u64 {
#[cfg(target_arch = "aarch64")]
{
if dispatch::sve2_enabled() {
unsafe { aarch64::sve2::scan_chunk(ptr, bound) }
} else {
unsafe { aarch64::neon::scan_chunk(ptr, bound) }
}
}
#[cfg(not(target_arch = "aarch64"))]
{
let vt = dispatch::get_vtable();
unsafe { (vt.scan_chunk)(ptr, bound) }
}
}
#[allow(dead_code)]
#[inline]
pub(crate) unsafe fn scan_and_prefetch(
ptr: *const u8,
prefetch_l1: *const u8,
prefetch_l2: *const u8,
bound: u8,
) -> u64 {
#[cfg(target_arch = "aarch64")]
{
if dispatch::sve2_enabled() {
unsafe { aarch64::sve2::scan_and_prefetch(ptr, prefetch_l1, prefetch_l2, bound) }
} else {
unsafe { aarch64::neon::scan_and_prefetch(ptr, prefetch_l1, prefetch_l2, bound) }
}
}
#[cfg(not(target_arch = "aarch64"))]
{
let vt = dispatch::get_vtable();
unsafe { (vt.scan_and_prefetch)(ptr, prefetch_l1, prefetch_l2, bound) }
}
}
#[allow(dead_code)]
#[inline]
pub(crate) unsafe fn scan_pair_and_prefetch(
ptr_a: *const u8,
ptr_b: *const u8,
prefetch_l1: *const u8,
prefetch_l2: *const u8,
bound: u8,
) -> (u64, u64) {
#[cfg(target_arch = "aarch64")]
{
if dispatch::sve2_enabled() {
unsafe {
let a = aarch64::sve2::scan_and_prefetch(ptr_a, prefetch_l1, prefetch_l2, bound);
let b = aarch64::sve2::scan_chunk(ptr_b, bound);
(a, b)
}
} else {
unsafe {
aarch64::neon::scan_chunk_pair_and_prefetch(
ptr_a,
ptr_b,
prefetch_l1,
prefetch_l2,
bound,
)
}
}
}
#[cfg(not(target_arch = "aarch64"))]
{
let vt = dispatch::get_vtable();
unsafe {
let a = (vt.scan_and_prefetch)(ptr_a, prefetch_l1, prefetch_l2, bound);
let b = (vt.scan_chunk)(ptr_b, bound);
(a, b)
}
}
}
#[allow(dead_code)]
#[inline]
pub(crate) fn find_first_above(bytes: &[u8], bound: u8) -> usize {
let len = bytes.len();
let mut offset = 0usize;
while offset + 64 <= len {
let mask = unsafe { scan_chunk(bytes.as_ptr().add(offset), bound) };
if mask != 0 {
return offset + mask.trailing_zeros() as usize;
}
offset += 64;
}
while offset < len {
if bytes[offset] >= bound {
return offset;
}
offset += 1;
}
len
}
#[cfg(test)]
mod dispatch_tests;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn find_first_above_all_below() {
let data = b"Hello, world! This is pure ASCII and should return len.";
assert_eq!(find_first_above(data, 0xC0), data.len());
}
#[test]
fn find_first_above_first_byte() {
let data = [0xC0u8, 0x00, 0x00, 0x00];
assert_eq!(find_first_above(&data, 0xC0), 0);
}
#[test]
fn find_first_above_in_second_chunk() {
let mut data = [0u8; 128];
data[65] = 0xC0;
assert_eq!(find_first_above(&data, 0xC0), 65);
}
#[test]
fn find_first_above_in_tail() {
let mut data = [0u8; 70];
data[68] = 0xC0;
assert_eq!(find_first_above(&data, 0xC0), 68);
}
#[test]
fn find_first_above_empty() {
assert_eq!(find_first_above(&[], 0xC0), 0);
}
#[test]
fn find_first_above_exact_chunk_boundary() {
let mut data = [0u8; 64];
data[63] = 0xC0;
assert_eq!(find_first_above(&data, 0xC0), 63);
}
#[test]
fn find_first_above_multi_chunk_utf8() {
let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\u{00E9}";
let bytes = s.as_bytes();
assert_eq!(find_first_above(bytes, 0xC0), 64);
}
#[cfg(not(target_arch = "aarch64"))]
#[test]
fn vtable_get_returns_consistent_reference() {
let vt1 = dispatch::get_vtable();
let vt2 = dispatch::get_vtable();
assert!(
core::ptr::eq(vt1, vt2),
"get_vtable() must return the same reference"
);
}
#[cfg(target_arch = "aarch64")]
#[test]
fn aarch64_dispatch_state_stable() {
let a = dispatch::sve2_enabled();
let b = dispatch::sve2_enabled();
assert_eq!(a, b, "sve2_enabled() must be stable across calls");
}
}