pub mod activation;
pub mod basic;
pub mod fma;
pub mod i8_ops;
pub mod matrix;
pub mod mobile;
pub mod reductions;
pub mod server;
pub mod sve;
pub use basic::{
neon_add_f32, neon_add_f64, neon_div_f32, neon_dot_f32, neon_dot_f64, neon_mul_f32,
neon_mul_f64, neon_sub_f32, neon_sub_f64,
};
pub use matrix::{neon_gemm_f32, neon_gemm_f64, neon_gemv_f32, neon_gemv_f64};
pub use activation::{
neon_gelu_f32, neon_leaky_relu_f32, neon_relu_f32, neon_sigmoid_f32, neon_tanh_f32,
};
pub use mobile::{
neon_dot_battery_optimized, neon_gemm_battery_optimized, neon_gemm_thermal_aware, BatteryMode,
MobileOptimizer, ThermalState,
};
pub use fma::{
neon_abs_f32, neon_abs_f64, neon_fmadd_f32, neon_fmadd_f64, neon_neg_f32, neon_scale_f32,
neon_scale_f64,
};
pub use reductions::{
neon_max_f32, neon_max_f64, neon_min_f32, neon_min_f64, neon_sum_f32, neon_sum_f64,
};
pub use server::{
neon_add_f32_unrolled, neon_batch_norm_f32_server, neon_dot_f32_unrolled,
neon_dot_f64_unrolled, neon_matmul_f32,
};
pub use i8_ops::{
neon_add_i8_saturating, neon_dequantize_i8_to_f32, neon_dot_i8, neon_dot_i8_i32acc,
neon_quantize_f32_to_i8,
};
pub use sve::{
detect_sve_capabilities, has_sve, has_sve2, sve_add_f32, sve_dot_f32, sve_scale_f32,
sve_sum_f32, SveCapabilities,
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ArmSimdCapabilities {
pub has_neon: bool,
pub has_sve: bool,
pub has_sve2: bool,
pub has_dotprod: bool,
pub has_bf16: bool,
pub vector_width_f32: usize,
pub vector_width_f64: usize,
pub sve_vector_len_bytes: usize,
pub is_apple_silicon_hint: bool,
}
impl Default for ArmSimdCapabilities {
fn default() -> Self {
detect_arm_capabilities()
}
}
pub fn detect_arm_capabilities() -> ArmSimdCapabilities {
#[cfg(target_arch = "aarch64")]
{
let has_neon = std::arch::is_aarch64_feature_detected!("neon");
let has_dotprod = std::arch::is_aarch64_feature_detected!("dotprod");
let has_bf16 = std::arch::is_aarch64_feature_detected!("bf16");
let sve_caps = detect_sve_capabilities();
let has_sve = sve_caps.has_sve;
let has_sve2 = sve_caps.has_sve2;
let sve_vector_len_bytes = sve_caps.vector_len_bytes;
let (vector_width_f32, vector_width_f64) = if has_sve && sve_vector_len_bytes > 0 {
(sve_vector_len_bytes / 4, sve_vector_len_bytes / 8)
} else if has_neon {
(4, 2) } else {
(1, 1) };
let is_apple_silicon_hint = has_bf16 && has_dotprod;
ArmSimdCapabilities {
has_neon,
has_sve,
has_sve2,
has_dotprod,
has_bf16,
vector_width_f32,
vector_width_f64,
sve_vector_len_bytes,
is_apple_silicon_hint,
}
}
#[cfg(target_arch = "arm")]
{
ArmSimdCapabilities {
has_neon: true, has_sve: false,
has_sve2: false,
has_dotprod: false,
has_bf16: false,
vector_width_f32: 4,
vector_width_f64: 2,
sve_vector_len_bytes: 0,
is_apple_silicon_hint: false,
}
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
{
ArmSimdCapabilities {
has_neon: false,
has_sve: false,
has_sve2: false,
has_dotprod: false,
has_bf16: false,
vector_width_f32: 1,
vector_width_f64: 1,
sve_vector_len_bytes: 0,
is_apple_silicon_hint: false,
}
}
}
#[inline]
pub fn has_neon() -> bool {
is_neon_available()
}
#[inline]
pub fn is_neon_available() -> bool {
#[cfg(target_arch = "aarch64")]
{
std::arch::is_aarch64_feature_detected!("neon")
}
#[cfg(target_arch = "arm")]
{
true
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
{
false
}
}
#[inline]
pub fn simd_capabilities() -> ArmSimdCapabilities {
detect_arm_capabilities()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_neon_detection() {
let available = is_neon_available();
#[cfg(target_arch = "aarch64")]
{
assert!(available, "NEON should be available on AArch64");
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
{
assert!(!available, "NEON should not be available on non-ARM");
}
}
#[test]
fn test_has_neon_alias() {
assert_eq!(has_neon(), is_neon_available());
}
#[test]
fn test_simd_capabilities_returns_valid_struct() {
let caps = simd_capabilities();
assert!(caps.vector_width_f32 >= 1);
assert!(caps.vector_width_f64 >= 1);
if !caps.has_sve {
assert_eq!(caps.sve_vector_len_bytes, 0);
}
if caps.has_sve2 {
assert!(caps.has_sve, "SVE2 implies SVE");
}
}
#[test]
fn test_detect_arm_capabilities_consistency() {
let caps = detect_arm_capabilities();
if caps.vector_width_f32 >= 4 && !caps.has_sve {
assert!(caps.has_neon, "4-wide f32 requires NEON");
}
if caps.has_sve {
assert!(
caps.sve_vector_len_bytes >= 16,
"SVE VLEN must be >= 16 bytes"
);
assert_eq!(
caps.sve_vector_len_bytes % 16,
0,
"SVE VLEN must be a multiple of 16 bytes"
);
}
}
}