#[cfg(feature = "std")]
use std::sync::OnceLock;
pub trait CpuKernel: Copy + 'static {
fn mask_lower_bits(value: u64, n: u8) -> u64;
}
#[derive(Copy, Clone, Default)]
pub struct ScalarKernel;
impl CpuKernel for ScalarKernel {
#[inline(always)]
fn mask_lower_bits(value: u64, n: u8) -> u64 {
let mask = u64::MAX
.checked_shr(64u32.wrapping_sub(n as u32))
.unwrap_or(0);
value & mask
}
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
#[derive(Copy, Clone, Default)]
pub(crate) struct Bmi2Kernel;
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
impl CpuKernel for Bmi2Kernel {
#[inline(always)]
fn mask_lower_bits(value: u64, n: u8) -> u64 {
unsafe { mask_lower_bits_bmi2_impl(value, n) }
}
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
#[derive(Copy, Clone, Default)]
pub(crate) struct Avx2Kernel;
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
impl CpuKernel for Avx2Kernel {
#[inline(always)]
fn mask_lower_bits(value: u64, n: u8) -> u64 {
unsafe { mask_lower_bits_bmi2_impl(value, n) }
}
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
#[derive(Copy, Clone, Default)]
pub(crate) struct Vbmi2Kernel;
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
impl CpuKernel for Vbmi2Kernel {
#[inline(always)]
fn mask_lower_bits(value: u64, n: u8) -> u64 {
unsafe { mask_lower_bits_bmi2_impl(value, n) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "kernel_neon"))]
#[allow(dead_code)]
#[derive(Copy, Clone, Default)]
pub(crate) struct NeonKernel;
#[cfg(all(target_arch = "aarch64", feature = "kernel_neon"))]
impl CpuKernel for NeonKernel {
#[inline(always)]
fn mask_lower_bits(value: u64, n: u8) -> u64 {
ScalarKernel::mask_lower_bits(value, n)
}
}
#[cfg(all(target_arch = "aarch64", feature = "kernel_sve"))]
#[allow(dead_code)]
#[derive(Copy, Clone, Default)]
pub(crate) struct SveKernel;
#[cfg(all(target_arch = "aarch64", feature = "kernel_sve"))]
impl CpuKernel for SveKernel {
#[inline(always)]
fn mask_lower_bits(value: u64, n: u8) -> u64 {
ScalarKernel::mask_lower_bits(value, n)
}
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
#[target_feature(enable = "bmi2")]
#[inline]
unsafe fn mask_lower_bits_bmi2_impl(value: u64, n: u8) -> u64 {
core::arch::x86_64::_bzhi_u64(value, n as u32)
}
#[cfg(target_arch = "x86_64")]
#[inline(always)]
#[allow(unused_variables)]
const fn select_x86_kernel(
has_avx512vbmi2: bool,
has_avx512f: bool,
has_avx512vl: bool,
has_avx512bw: bool,
has_bmi2: bool,
has_avx2: bool,
has_sse2: bool,
) -> CpuKernelTag {
#[cfg(feature = "kernel_vbmi2")]
if has_avx512vbmi2 && has_avx512f && has_avx512vl && has_avx512bw && has_bmi2 && has_avx2 {
return CpuKernelTag::Vbmi2;
}
#[cfg(feature = "kernel_avx2")]
if has_avx2 && has_bmi2 {
return CpuKernelTag::Avx2;
}
#[cfg(feature = "kernel_bmi2")]
if has_bmi2 {
return CpuKernelTag::Bmi2;
}
#[cfg(feature = "kernel_sse2")]
if has_sse2 {
return CpuKernelTag::Sse2;
}
CpuKernelTag::Scalar
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum CpuKernelTag {
Scalar,
#[cfg(all(target_arch = "x86_64", feature = "kernel_sse2"))]
Sse2,
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
Bmi2,
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
Avx2,
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
Vbmi2,
#[cfg(all(target_arch = "aarch64", feature = "kernel_neon"))]
Neon,
#[cfg(all(
target_arch = "aarch64",
feature = "kernel_sve",
any(feature = "std", target_feature = "sve"),
))]
Sve,
}
#[cfg(feature = "std")]
pub(crate) fn detect_cpu_kernel() -> CpuKernelTag {
static CACHED: OnceLock<CpuKernelTag> = OnceLock::new();
*CACHED.get_or_init(detect_cpu_kernel_uncached)
}
#[cfg(feature = "std")]
fn detect_cpu_kernel_uncached() -> CpuKernelTag {
#[cfg(target_arch = "x86_64")]
{
use std::arch::is_x86_feature_detected;
return select_x86_kernel(
cfg!(feature = "kernel_vbmi2") && is_x86_feature_detected!("avx512vbmi2"),
cfg!(feature = "kernel_vbmi2") && is_x86_feature_detected!("avx512f"),
cfg!(feature = "kernel_vbmi2") && is_x86_feature_detected!("avx512vl"),
cfg!(feature = "kernel_vbmi2") && is_x86_feature_detected!("avx512bw"),
cfg!(feature = "kernel_bmi2") && is_x86_feature_detected!("bmi2"),
cfg!(feature = "kernel_avx2") && is_x86_feature_detected!("avx2"),
cfg!(feature = "kernel_sse2") && is_x86_feature_detected!("sse2"),
);
}
#[cfg(target_arch = "aarch64")]
{
#[cfg(any(feature = "kernel_sve", feature = "kernel_neon"))]
use std::arch::is_aarch64_feature_detected;
#[cfg(feature = "kernel_sve")]
if is_aarch64_feature_detected!("sve") {
return CpuKernelTag::Sve;
}
#[cfg(feature = "kernel_neon")]
if is_aarch64_feature_detected!("neon") {
return CpuKernelTag::Neon;
}
return CpuKernelTag::Scalar;
}
#[allow(unreachable_code)]
CpuKernelTag::Scalar
}
#[cfg(not(feature = "std"))]
pub(crate) fn detect_cpu_kernel() -> CpuKernelTag {
#[cfg(target_arch = "x86_64")]
{
return select_x86_kernel(
cfg!(target_feature = "avx512vbmi2"),
cfg!(target_feature = "avx512f"),
cfg!(target_feature = "avx512vl"),
cfg!(target_feature = "avx512bw"),
cfg!(target_feature = "bmi2"),
cfg!(target_feature = "avx2"),
cfg!(target_feature = "sse2"),
);
}
#[cfg(target_arch = "aarch64")]
{
#[cfg(all(feature = "kernel_sve", target_feature = "sve"))]
{
return CpuKernelTag::Sve;
}
#[cfg(all(feature = "kernel_neon", target_feature = "neon"))]
{
return CpuKernelTag::Neon;
}
}
#[allow(unreachable_code)]
CpuKernelTag::Scalar
}
impl CpuKernelTag {
pub(crate) fn name(self) -> &'static str {
match self {
CpuKernelTag::Scalar => "scalar",
#[cfg(all(target_arch = "x86_64", feature = "kernel_sse2"))]
CpuKernelTag::Sse2 => "sse2",
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
CpuKernelTag::Bmi2 => "bmi2",
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
CpuKernelTag::Avx2 => "avx2",
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
CpuKernelTag::Vbmi2 => "vbmi2",
#[cfg(all(target_arch = "aarch64", feature = "kernel_neon"))]
CpuKernelTag::Neon => "neon",
#[cfg(all(
target_arch = "aarch64",
feature = "kernel_sve",
any(feature = "std", target_feature = "sve"),
))]
CpuKernelTag::Sve => "sve",
}
}
}
pub fn active_cpu_kernel_name() -> &'static str {
detect_cpu_kernel().name()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scalar_mask_lower_bits_zero_n_returns_zero() {
assert_eq!(ScalarKernel::mask_lower_bits(0xDEADBEEF, 0), 0);
}
#[test]
fn scalar_mask_lower_bits_full_64_returns_full_value() {
assert_eq!(
ScalarKernel::mask_lower_bits(0xFFFF_FFFF_FFFF_FFFF, 64),
0xFFFF_FFFF_FFFF_FFFF
);
}
#[test]
fn scalar_mask_lower_bits_mid_keeps_low_n_bits() {
assert_eq!(ScalarKernel::mask_lower_bits(0xDEAD_BEEF, 8), 0xEF);
assert_eq!(
ScalarKernel::mask_lower_bits(0x0102_0304_0506_0708, 16),
0x0708
);
}
#[cfg(all(target_arch = "x86_64", feature = "std", feature = "kernel_avx2"))]
#[test]
fn avx2_mask_lower_bits_matches_scalar_on_bmi2_hw() {
if !std::arch::is_x86_feature_detected!("bmi2") {
return;
}
for n in 0..=64u8 {
let v = 0x1234_5678_9ABC_DEF0u64;
assert_eq!(
Avx2Kernel::mask_lower_bits(v, n),
ScalarKernel::mask_lower_bits(v, n),
"mismatch at n={}",
n
);
}
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
#[test]
fn select_x86_kernel_vbmi2_without_avx2_does_not_pick_vbmi2() {
let tag = select_x86_kernel(
true, true, true,
true, true, false,
true,
);
assert_ne!(
tag,
CpuKernelTag::Vbmi2,
"selecting Vbmi2 without AVX2 would call AVX2 instructions and SIGILL"
);
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
#[test]
fn select_x86_kernel_full_x86_v4_picks_vbmi2() {
let tag = select_x86_kernel(true, true, true, true, true, true, true);
assert_eq!(tag, CpuKernelTag::Vbmi2);
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
#[test]
fn select_x86_kernel_avx2_baseline_picks_avx2() {
let tag = select_x86_kernel(false, false, false, false, true, true, true);
assert_eq!(tag, CpuKernelTag::Avx2);
}
#[cfg(all(target_arch = "x86_64", feature = "kernel_sse2"))]
#[test]
fn select_x86_kernel_sse2_only_picks_sse2() {
let tag = select_x86_kernel(false, false, false, false, false, false, true);
assert_eq!(tag, CpuKernelTag::Sse2);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn select_x86_kernel_no_features_picks_scalar() {
let tag = select_x86_kernel(false, false, false, false, false, false, false);
assert_eq!(tag, CpuKernelTag::Scalar);
}
#[test]
fn detect_returns_consistent_tag() {
let first = detect_cpu_kernel();
let second = detect_cpu_kernel();
assert_eq!(
first, second,
"cached detect must return same tag on repeated calls"
);
}
#[test]
fn active_kernel_name_is_known_lowercase_tier() {
const KNOWN: &[&str] = &["scalar", "sse2", "bmi2", "avx2", "vbmi2", "neon", "sve"];
let name = active_cpu_kernel_name();
assert!(
KNOWN.contains(&name),
"active kernel name {name:?} is not a recognised tier"
);
assert_eq!(
name,
name.to_ascii_lowercase(),
"tier name must be lowercase for stable dashboard parsing"
);
}
#[test]
fn every_kernel_tag_maps_to_its_lowercase_name() {
assert_eq!(CpuKernelTag::Scalar.name(), "scalar");
#[cfg(all(target_arch = "x86_64", feature = "kernel_sse2"))]
assert_eq!(CpuKernelTag::Sse2.name(), "sse2");
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
assert_eq!(CpuKernelTag::Bmi2.name(), "bmi2");
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
assert_eq!(CpuKernelTag::Avx2.name(), "avx2");
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
assert_eq!(CpuKernelTag::Vbmi2.name(), "vbmi2");
#[cfg(all(target_arch = "aarch64", feature = "kernel_neon"))]
assert_eq!(CpuKernelTag::Neon.name(), "neon");
#[cfg(all(
target_arch = "aarch64",
feature = "kernel_sve",
any(feature = "std", target_feature = "sve"),
))]
assert_eq!(CpuKernelTag::Sve.name(), "sve");
}
#[test]
fn active_kernel_name_is_stable_across_calls() {
assert_eq!(active_cpu_kernel_name(), active_cpu_kernel_name());
}
}