#![allow(unused_unsafe)]
#![allow(dead_code)]
pub(crate) mod scalar;
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
pub(crate) mod neon;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) mod sse42;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) mod avx2_bmi2;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum FastpathKernel {
Scalar,
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
Neon,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Sse42,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Avx2Bmi2,
}
#[inline]
pub(crate) fn select_kernel() -> FastpathKernel {
#[cfg(feature = "std")]
{
use std::sync::OnceLock;
static CACHE: OnceLock<FastpathKernel> = OnceLock::new();
*CACHE.get_or_init(detect_kernel_uncached)
}
#[cfg(not(feature = "std"))]
{
detect_kernel_uncached()
}
}
#[inline]
fn detect_kernel_uncached() -> FastpathKernel {
#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
{
if std::is_x86_feature_detected!("avx2")
&& std::is_x86_feature_detected!("bmi2")
&& std::is_x86_feature_detected!("sse4.2")
{
return FastpathKernel::Avx2Bmi2;
}
if std::is_x86_feature_detected!("sse4.2") {
return FastpathKernel::Sse42;
}
}
#[cfg(all(feature = "std", target_arch = "aarch64", target_endian = "little"))]
{
if std::arch::is_aarch64_feature_detected!("neon")
&& std::arch::is_aarch64_feature_detected!("crc")
{
return FastpathKernel::Neon;
}
}
#[cfg(all(not(feature = "std"), any(target_arch = "x86", target_arch = "x86_64")))]
{
if cfg!(target_feature = "avx2")
&& cfg!(target_feature = "bmi2")
&& cfg!(target_feature = "sse4.2")
{
return FastpathKernel::Avx2Bmi2;
}
if cfg!(target_feature = "sse4.2") {
return FastpathKernel::Sse42;
}
}
#[cfg(all(
not(feature = "std"),
target_arch = "aarch64",
target_endian = "little"
))]
{
if cfg!(target_feature = "neon") && cfg!(target_feature = "crc") {
return FastpathKernel::Neon;
}
}
FastpathKernel::Scalar
}
#[inline]
pub(crate) fn dispatch_count_match_from_indices(
concat: &[u8],
current_idx: usize,
candidate_idx: usize,
tail_limit: usize,
seed_len: usize,
) -> usize {
match select_kernel() {
FastpathKernel::Scalar => unsafe {
scalar::count_match_from_indices(
concat,
current_idx,
candidate_idx,
tail_limit,
seed_len,
)
},
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
FastpathKernel::Neon => unsafe {
neon::count_match_from_indices(concat, current_idx, candidate_idx, tail_limit, seed_len)
},
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Sse42 => unsafe {
sse42::count_match_from_indices(
concat,
current_idx,
candidate_idx,
tail_limit,
seed_len,
)
},
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Avx2Bmi2 => unsafe {
avx2_bmi2::count_match_from_indices(
concat,
current_idx,
candidate_idx,
tail_limit,
seed_len,
)
},
}
}
#[inline(always)]
pub(crate) fn hash_mix_u64_with_kernel(kernel: FastpathKernel, value: u64) -> u64 {
match kernel {
FastpathKernel::Scalar => scalar::hash_mix_u64(value),
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
FastpathKernel::Neon => unsafe { neon::hash_mix_u64(value) },
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Sse42 => unsafe { sse42::hash_mix_u64(value) },
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Avx2Bmi2 => unsafe { avx2_bmi2::hash_mix_u64(value) },
}
}
#[inline]
pub(crate) fn dispatch_hash_mix_u64(value: u64) -> u64 {
hash_mix_u64_with_kernel(select_kernel(), value)
}
#[inline]
pub(crate) unsafe fn dispatch_common_prefix_len_ptr(
lhs: *const u8,
rhs: *const u8,
max: usize,
) -> usize {
match select_kernel() {
FastpathKernel::Scalar => unsafe { scalar::common_prefix_len_ptr(lhs, rhs, max) },
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
FastpathKernel::Neon => unsafe { neon::common_prefix_len_ptr(lhs, rhs, max) },
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Sse42 => unsafe { sse42::common_prefix_len_ptr(lhs, rhs, max) },
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Avx2Bmi2 => unsafe { avx2_bmi2::common_prefix_len_ptr(lhs, rhs, max) },
}
}
#[cfg(test)]
mod tests {
use super::{FastpathKernel, detect_kernel_uncached, select_kernel};
#[test]
fn select_kernel_returns_supported_variant() {
let k = select_kernel();
assert_eq!(k, detect_kernel_uncached());
match k {
FastpathKernel::Scalar => {}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
FastpathKernel::Neon => {}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Sse42 => {}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
FastpathKernel::Avx2Bmi2 => {}
}
}
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
#[test]
fn aarch64_picks_neon_when_crc_available() {
#[cfg(feature = "std")]
let crc_available = std::arch::is_aarch64_feature_detected!("crc");
#[cfg(not(feature = "std"))]
let crc_available = cfg!(target_feature = "crc");
let expected = if crc_available {
FastpathKernel::Neon
} else {
FastpathKernel::Scalar
};
assert_eq!(detect_kernel_uncached(), expected);
}
}