use std::sync::OnceLock;
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
#[allow(clippy::struct_excessive_bools)]
pub struct SimdCapabilities {
pub avx2: bool,
pub fma: bool,
pub sse42: bool,
pub neon: bool,
}
impl SimdCapabilities {
#[must_use]
pub fn detect() -> Self {
#[cfg(target_arch = "x86_64")]
{
Self {
avx2: is_x86_feature_detected!("avx2"),
fma: is_x86_feature_detected!("fma"),
sse42: is_x86_feature_detected!("sse4.2"),
neon: false,
}
}
#[cfg(target_arch = "x86")]
{
Self {
avx2: is_x86_feature_detected!("avx2"),
fma: is_x86_feature_detected!("fma"),
sse42: is_x86_feature_detected!("sse4.2"),
neon: false,
}
}
#[cfg(target_arch = "aarch64")]
{
Self {
avx2: false,
fma: false,
sse42: false,
neon: std::arch::is_aarch64_feature_detected!("neon"),
}
}
#[cfg(target_arch = "wasm32")]
{
Self::default()
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "aarch64",
target_arch = "wasm32"
)))]
{
Self::default()
}
}
#[must_use]
pub fn is_optimal(&self) -> bool {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
self.avx2 && self.fma
}
#[cfg(target_arch = "aarch64")]
{
self.neon
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))]
{
true
} }
#[must_use]
pub fn performance_warning(&self) -> Option<String> {
if self.is_optimal() {
return None;
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
let mut missing = Vec::new();
if !self.avx2 {
missing.push("AVX2");
}
if !self.fma {
missing.push("FMA");
}
Some(format!(
"EdgeVec: Suboptimal SIMD configuration detected. Missing: {}. \
Expected 60-78% performance loss. \
Add `rustflags = [\"-C\", \"target-cpu=native\"]` to .cargo/config.toml",
missing.join(", ")
))
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
{
None
}
}
}
static CAPABILITIES: OnceLock<SimdCapabilities> = OnceLock::new();
#[must_use]
pub fn capabilities() -> &'static SimdCapabilities {
CAPABILITIES.get_or_init(SimdCapabilities::detect)
}
pub fn warn_if_suboptimal() {
if let Some(warning) = capabilities().performance_warning() {
eprintln!("{warning}");
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_returns_valid_capabilities() {
let caps = SimdCapabilities::detect();
let _ = caps.avx2;
let _ = caps.fma;
let _ = caps.sse42;
let _ = caps.neon;
}
#[test]
fn test_default_all_false() {
let caps = SimdCapabilities::default();
assert!(!caps.avx2);
assert!(!caps.fma);
assert!(!caps.sse42);
assert!(!caps.neon);
}
#[test]
fn test_capabilities_returns_same_instance() {
let caps1 = capabilities();
let caps2 = capabilities();
assert!(std::ptr::eq(caps1, caps2));
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn test_is_optimal_with_full_x86_features() {
let caps = SimdCapabilities {
avx2: true,
fma: true,
sse42: true,
neon: false,
};
assert!(caps.is_optimal());
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn test_is_optimal_missing_avx2() {
let caps = SimdCapabilities {
avx2: false,
fma: true,
sse42: true,
neon: false,
};
assert!(!caps.is_optimal());
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn test_is_optimal_missing_fma() {
let caps = SimdCapabilities {
avx2: true,
fma: false,
sse42: true,
neon: false,
};
assert!(!caps.is_optimal());
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn test_performance_warning_none_when_optimal() {
let caps = SimdCapabilities {
avx2: true,
fma: true,
sse42: true,
neon: false,
};
assert!(caps.performance_warning().is_none());
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn test_performance_warning_present_when_suboptimal() {
let caps = SimdCapabilities {
avx2: false,
fma: false,
sse42: true,
neon: false,
};
let warning = caps.performance_warning();
assert!(warning.is_some());
let msg = warning.unwrap();
assert!(msg.contains("AVX2"));
assert!(msg.contains("FMA"));
assert!(msg.contains("60-78%"));
assert!(msg.contains("target-cpu=native"));
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn test_performance_warning_missing_only_avx2() {
let caps = SimdCapabilities {
avx2: false,
fma: true,
sse42: true,
neon: false,
};
let warning = caps.performance_warning();
assert!(warning.is_some());
let msg = warning.unwrap();
assert!(msg.contains("AVX2"));
assert!(!msg.contains("FMA"));
}
#[test]
fn test_warn_if_suboptimal_does_not_panic() {
warn_if_suboptimal();
}
#[test]
fn test_simd_capabilities_clone() {
let caps = SimdCapabilities {
avx2: true,
fma: false,
sse42: true,
neon: false,
};
let cloned = caps;
assert_eq!(caps, cloned);
}
#[test]
fn test_simd_capabilities_debug() {
let caps = SimdCapabilities::default();
let debug_str = format!("{caps:?}");
assert!(debug_str.contains("SimdCapabilities"));
assert!(debug_str.contains("avx2"));
assert!(debug_str.contains("fma"));
}
}