pub mod batch;
pub mod memory;
pub mod parallel;
pub mod quantize;
pub mod simd;
use std::sync::OnceLock;
#[derive(Debug, Clone, Copy)]
pub struct CpuFeatures {
pub avx2: bool,
pub avx512f: bool,
pub neon: bool,
pub sse4_2: bool,
}
static CPU_FEATURES: OnceLock<CpuFeatures> = OnceLock::new();
pub fn detect_features() -> CpuFeatures {
*CPU_FEATURES.get_or_init(|| {
#[cfg(target_arch = "x86_64")]
{
CpuFeatures {
avx2: is_x86_feature_detected!("avx2"),
avx512f: is_x86_feature_detected!("avx512f"),
neon: false,
sse4_2: is_x86_feature_detected!("sse4.2"),
}
}
#[cfg(target_arch = "aarch64")]
{
CpuFeatures {
avx2: false,
avx512f: false,
neon: std::arch::is_aarch64_feature_detected!("neon"),
sse4_2: false,
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
CpuFeatures {
avx2: false,
avx512f: false,
neon: false,
sse4_2: false,
}
}
})
}
pub fn get_features() -> CpuFeatures {
detect_features()
}
pub trait OptimizedOp<T> {
fn execute(&self, input: T) -> T;
fn execute_auto(&self, input: T) -> T {
let features = get_features();
if features.avx2 || features.avx512f || features.neon {
self.execute_simd(input)
} else {
self.execute_scalar(input)
}
}
fn execute_simd(&self, input: T) -> T;
fn execute_scalar(&self, input: T) -> T;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OptLevel {
None,
Simd,
Parallel,
Full,
}
impl Default for OptLevel {
fn default() -> Self {
OptLevel::Full
}
}
static OPT_LEVEL: OnceLock<OptLevel> = OnceLock::new();
pub fn set_opt_level(level: OptLevel) {
OPT_LEVEL.set(level).ok();
}
pub fn get_opt_level() -> OptLevel {
*OPT_LEVEL.get_or_init(OptLevel::default)
}
pub fn simd_enabled() -> bool {
matches!(
get_opt_level(),
OptLevel::Simd | OptLevel::Parallel | OptLevel::Full
)
}
pub fn parallel_enabled() -> bool {
matches!(get_opt_level(), OptLevel::Parallel | OptLevel::Full)
}
pub fn memory_opt_enabled() -> bool {
matches!(get_opt_level(), OptLevel::Full)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_feature_detection() {
let features = detect_features();
println!("Detected features: {:?}", features);
assert!(
features.avx2
|| features.avx512f
|| features.neon
|| features.sse4_2
|| (!features.avx2 && !features.avx512f && !features.neon && !features.sse4_2)
);
}
#[test]
fn test_opt_level() {
assert_eq!(get_opt_level(), OptLevel::Full);
set_opt_level(OptLevel::Simd);
assert_eq!(get_opt_level(), OptLevel::Full);
}
#[test]
fn test_optimization_checks() {
assert!(simd_enabled());
assert!(parallel_enabled());
assert!(memory_opt_enabled());
}
}