#![allow(dead_code)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CpuFeature {
Sse2,
Sse4_1,
Avx,
Avx2,
Neon,
}
#[derive(Debug, Clone, Default)]
pub struct CpuCapabilities {
features: Vec<CpuFeature>,
}
impl CpuCapabilities {
#[must_use]
pub fn detect() -> Self {
let mut features = Vec::new();
#[cfg(target_arch = "x86_64")]
{
features.push(CpuFeature::Sse2);
if is_x86_feature_detected!("sse4.1") {
features.push(CpuFeature::Sse4_1);
}
if is_x86_feature_detected!("avx") {
features.push(CpuFeature::Avx);
}
if is_x86_feature_detected!("avx2") {
features.push(CpuFeature::Avx2);
}
}
#[cfg(target_arch = "aarch64")]
{
features.push(CpuFeature::Neon);
}
Self { features }
}
#[must_use]
pub fn has(&self, feature: CpuFeature) -> bool {
self.features.contains(&feature)
}
#[must_use]
pub fn best_simd_width(&self) -> usize {
if self.has(CpuFeature::Avx2) || self.has(CpuFeature::Avx) {
8
} else if self.has(CpuFeature::Sse2) || self.has(CpuFeature::Neon) {
4
} else {
1
}
}
#[must_use]
pub fn features(&self) -> &[CpuFeature] {
&self.features
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum DispatchLevel {
Scalar,
Sse2,
Avx2,
Neon,
}
#[must_use]
pub fn select_dispatch_level(caps: &CpuCapabilities) -> DispatchLevel {
if caps.has(CpuFeature::Avx2) {
DispatchLevel::Avx2
} else if caps.has(CpuFeature::Neon) {
DispatchLevel::Neon
} else if caps.has(CpuFeature::Sse2) {
DispatchLevel::Sse2
} else {
DispatchLevel::Scalar
}
}
pub struct AccelDispatcher {
level: DispatchLevel,
capabilities: CpuCapabilities,
}
impl AccelDispatcher {
#[must_use]
pub fn new() -> Self {
let capabilities = CpuCapabilities::detect();
let level = select_dispatch_level(&capabilities);
Self {
level,
capabilities,
}
}
#[must_use]
pub fn with_level(level: DispatchLevel) -> Self {
Self {
level,
capabilities: CpuCapabilities::default(),
}
}
#[must_use]
pub fn level(&self) -> DispatchLevel {
self.level
}
#[must_use]
pub fn capabilities(&self) -> &CpuCapabilities {
&self.capabilities
}
#[must_use]
pub fn dot_product(&self, a: &[f32], b: &[f32]) -> f32 {
let _ = self.level; a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
}
pub fn apply_gain(&self, samples: &mut [f32], gain: f32) {
let _ = self.level; for s in samples.iter_mut() {
*s *= gain;
}
}
#[must_use]
pub fn chunk_size(&self) -> usize {
match self.level {
DispatchLevel::Avx2 => 512,
DispatchLevel::Sse2 | DispatchLevel::Neon => 256,
DispatchLevel::Scalar => 64,
}
}
}
impl Default for AccelDispatcher {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_does_not_panic() {
let caps = CpuCapabilities::detect();
let _ = caps.best_simd_width();
}
#[test]
fn test_has_feature_false_by_default() {
let caps = CpuCapabilities::default();
assert!(!caps.has(CpuFeature::Avx2));
assert!(!caps.has(CpuFeature::Neon));
}
#[test]
fn test_best_simd_width_scalar() {
let caps = CpuCapabilities::default(); assert_eq!(caps.best_simd_width(), 1);
}
#[test]
fn test_best_simd_width_avx2() {
let caps = CpuCapabilities {
features: vec![CpuFeature::Avx2],
};
assert_eq!(caps.best_simd_width(), 8);
}
#[test]
fn test_best_simd_width_neon() {
let caps = CpuCapabilities {
features: vec![CpuFeature::Neon],
};
assert_eq!(caps.best_simd_width(), 4);
}
#[test]
fn test_select_dispatch_level_scalar() {
let caps = CpuCapabilities::default();
assert_eq!(select_dispatch_level(&caps), DispatchLevel::Scalar);
}
#[test]
fn test_select_dispatch_level_avx2() {
let caps = CpuCapabilities {
features: vec![CpuFeature::Avx2],
};
assert_eq!(select_dispatch_level(&caps), DispatchLevel::Avx2);
}
#[test]
fn test_select_dispatch_level_neon() {
let caps = CpuCapabilities {
features: vec![CpuFeature::Neon],
};
assert_eq!(select_dispatch_level(&caps), DispatchLevel::Neon);
}
#[test]
fn test_dispatcher_dot_product() {
let disp = AccelDispatcher::with_level(DispatchLevel::Scalar);
let a = vec![1.0_f32, 2.0, 3.0];
let b = vec![4.0_f32, 5.0, 6.0];
let result = disp.dot_product(&a, &b);
assert!((result - 32.0).abs() < 1e-4);
}
#[test]
fn test_dispatcher_apply_gain() {
let disp = AccelDispatcher::with_level(DispatchLevel::Scalar);
let mut samples = vec![1.0_f32, 2.0, 3.0];
disp.apply_gain(&mut samples, 2.0);
assert_eq!(samples, vec![2.0, 4.0, 6.0]);
}
#[test]
fn test_chunk_size_ordering() {
let scalar = AccelDispatcher::with_level(DispatchLevel::Scalar).chunk_size();
let sse2 = AccelDispatcher::with_level(DispatchLevel::Sse2).chunk_size();
let avx2 = AccelDispatcher::with_level(DispatchLevel::Avx2).chunk_size();
assert!(scalar <= sse2);
assert!(sse2 <= avx2);
}
#[test]
fn test_dispatcher_new_does_not_panic() {
let disp = AccelDispatcher::new();
let _ = disp.level();
}
#[test]
fn test_cpu_feature_list_not_empty_x86_64() {
#[cfg(target_arch = "x86_64")]
{
let caps = CpuCapabilities::detect();
assert!(caps.has(CpuFeature::Sse2));
}
#[cfg(not(target_arch = "x86_64"))]
{
}
}
}