#[path = "../simd_impl.rs"]
mod simd_impl;
pub mod detect;
pub mod traits;
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
pub mod neon;
pub mod arithmetic;
pub mod basic;
pub mod basic_optimized; pub mod dot;
pub mod gemm;
pub mod reductions;
pub mod distances;
pub mod norms;
pub mod similarity;
pub mod weighted;
pub mod activation;
pub mod cumulative;
pub mod indexing;
pub mod normalization; pub mod preprocessing;
pub mod rounding; pub mod transcendental; pub mod transpose;
pub mod unary;
pub mod unary_powi;
pub use gemm::{blocked_gemm_f32, blocked_gemm_f64, should_use_blocked, MatMulConfig};
pub use detect::{detect_simd_capabilities, get_cpu_features, CpuFeatures, SimdCapabilities};
pub use traits::SimdOps;
pub use basic::{
simd_add_aligned_ultra, simd_add_f32, simd_add_f32_fast, simd_add_f32_optimized,
simd_add_f32_ultra, simd_add_f64, simd_maximum_f32, simd_maximum_f64, simd_minimum_f32,
simd_minimum_f64,
};
pub use basic_optimized::{
simd_add_f32_ultra_optimized, simd_dot_f32_ultra_optimized, simd_mul_f32_ultra_optimized,
simd_sum_f32_ultra_optimized,
};
pub use arithmetic::{simd_scalar_mul_f32, simd_scalar_mul_f64};
pub use dot::{
simd_div_f32, simd_div_f64, simd_dot_f32, simd_dot_f32_adaptive, simd_dot_f32_ultra,
simd_dot_f64, simd_fma_f32_ultra, simd_mul_f32, simd_mul_f32_fast, simd_mul_f64, simd_sub_f32,
simd_sub_f64,
};
pub use reductions::{
simd_max_f32, simd_max_f64, simd_mean_f32, simd_mean_f64, simd_min_f32, simd_min_f64,
simd_std_f32, simd_std_f64, simd_sum_f32, simd_sum_f64, simd_variance_f32, simd_variance_f64,
};
pub use norms::{
simd_norm_l1_f32, simd_norm_l1_f64, simd_norm_l2_f32, simd_norm_l2_f64, simd_norm_linf_f32,
simd_norm_linf_f64,
};
pub use distances::{
simd_distance_chebyshev_f32, simd_distance_chebyshev_f64, simd_distance_euclidean_f32,
simd_distance_euclidean_f64, simd_distance_manhattan_f32, simd_distance_manhattan_f64,
simd_distance_squared_euclidean_f32, simd_distance_squared_euclidean_f64,
};
pub use similarity::{
simd_cosine_similarity_f32, simd_cosine_similarity_f64, simd_distance_cosine_f32,
simd_distance_cosine_f64,
};
pub use weighted::{
simd_weighted_mean_f32, simd_weighted_mean_f64, simd_weighted_sum_f32, simd_weighted_sum_f64,
};
pub use preprocessing::{
simd_normalize_f32, simd_normalize_f64, simd_standardize_f32, simd_standardize_f64,
};
pub use indexing::{
simd_argmax_f32, simd_argmax_f64, simd_argmin_f32, simd_argmin_f64, simd_clip_f32,
simd_clip_f64,
};
pub use activation::{
simd_leaky_relu_f32, simd_leaky_relu_f64, simd_log_sum_exp_f32, simd_log_sum_exp_f64,
simd_relu_f32, simd_relu_f64, simd_softmax_f32, simd_softmax_f64,
};
pub use cumulative::{
simd_cumprod_f32, simd_cumprod_f64, simd_cumsum_f32, simd_cumsum_f64, simd_diff_f32,
simd_diff_f64,
};
pub use unary::{
simd_abs_f32, simd_abs_f64, simd_sign_f32, simd_sign_f64, simd_sqrt_f32, simd_sqrt_f64,
};
pub use unary_powi::{simd_powi_f32, simd_powi_f64};
pub use transpose::{simd_transpose_blocked_f32, simd_transpose_blocked_f64};
pub use rounding::{
simd_ceil_f32, simd_ceil_f64, simd_floor_f32, simd_floor_f64, simd_round_f32, simd_round_f64,
simd_trunc_f32, simd_trunc_f64,
};
pub use transcendental::{
simd_cos_f32, simd_cos_f64, simd_exp_f32, simd_exp_f64, simd_exp_fast_f32, simd_gelu_f32,
simd_gelu_f64, simd_ln_f32, simd_ln_f64, simd_log10_f32, simd_log10_f64, simd_log2_f32,
simd_log2_f64, simd_mish_f32, simd_mish_f64, simd_sigmoid_f32, simd_sigmoid_f64, simd_sin_f32,
simd_sin_f64, simd_softplus_f32, simd_softplus_f64, simd_swish_f32, simd_swish_f64,
simd_tanh_f32, simd_tanh_f64,
};
pub use normalization::{
simd_batch_norm_f32, simd_batch_norm_f64, simd_layer_norm_f32, simd_layer_norm_f64,
};
pub use simd_impl::*;