use std::sync::OnceLock;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SimdSupport {
pub sse2: bool,
pub avx2: bool,
pub neon: bool,
}
static SIMD_SUPPORT: OnceLock<SimdSupport> = OnceLock::new();
pub fn init_simd_support() {
SIMD_SUPPORT.get_or_init(|| detect_simd_support());
}
fn detect_simd_support() -> SimdSupport {
#[cfg(target_arch = "x86_64")]
{
SimdSupport {
sse2: is_x86_feature_detected!("sse2"),
avx2: is_x86_feature_detected!("avx2"),
neon: false,
}
}
#[cfg(target_arch = "aarch64")]
{
SimdSupport {
sse2: false,
avx2: false,
neon: std::arch::is_aarch64_feature_detected!("neon"),
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
SimdSupport {
sse2: false,
avx2: false,
neon: false,
}
}
}
pub fn get_simd_support() -> SimdSupport {
*SIMD_SUPPORT.get_or_init(|| detect_simd_support())
}
pub fn has_simd_support() -> bool {
let support = get_simd_support();
support.sse2 || support.avx2 || support.neon
}
#[cfg(target_arch = "x86_64")]
pub fn encode_mulaw_simd_sse2(samples: &[i16], output: &mut [u8]) {
use std::arch::x86_64::*;
if !get_simd_support().sse2 {
return encode_mulaw_scalar(samples, output);
}
let mut chunks = samples.chunks_exact(8);
let mut out_idx = 0;
unsafe {
for chunk in chunks.by_ref() {
let samples_vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
output[out_idx] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 0) as i16);
output[out_idx + 1] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 1) as i16);
output[out_idx + 2] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 2) as i16);
output[out_idx + 3] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 3) as i16);
output[out_idx + 4] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 4) as i16);
output[out_idx + 5] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 5) as i16);
output[out_idx + 6] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 6) as i16);
output[out_idx + 7] = linear_to_mulaw_scalar(_mm_extract_epi16(samples_vec, 7) as i16);
out_idx += 8;
}
}
for &sample in chunks.remainder() {
output[out_idx] = linear_to_mulaw_scalar(sample);
out_idx += 1;
}
}
#[cfg(target_arch = "aarch64")]
pub fn encode_mulaw_simd_neon(samples: &[i16], output: &mut [u8]) {
if !get_simd_support().neon {
return encode_mulaw_scalar(samples, output);
}
encode_mulaw_scalar(samples, output);
}
pub fn encode_mulaw_scalar(samples: &[i16], output: &mut [u8]) {
for (i, &sample) in samples.iter().enumerate() {
output[i] = linear_to_mulaw_scalar(sample);
}
}
#[cfg(target_arch = "x86_64")]
pub fn encode_alaw_simd_sse2(samples: &[i16], output: &mut [u8]) {
use std::arch::x86_64::*;
if !get_simd_support().sse2 {
return encode_alaw_scalar(samples, output);
}
let mut chunks = samples.chunks_exact(8);
let mut out_idx = 0;
unsafe {
for chunk in chunks.by_ref() {
let samples_vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
output[out_idx] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 0) as i16);
output[out_idx + 1] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 1) as i16);
output[out_idx + 2] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 2) as i16);
output[out_idx + 3] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 3) as i16);
output[out_idx + 4] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 4) as i16);
output[out_idx + 5] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 5) as i16);
output[out_idx + 6] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 6) as i16);
output[out_idx + 7] = linear_to_alaw_scalar(_mm_extract_epi16(samples_vec, 7) as i16);
out_idx += 8;
}
}
for &sample in chunks.remainder() {
output[out_idx] = linear_to_alaw_scalar(sample);
out_idx += 1;
}
}
#[cfg(target_arch = "aarch64")]
pub fn encode_alaw_simd_neon(samples: &[i16], output: &mut [u8]) {
if !get_simd_support().neon {
return encode_alaw_scalar(samples, output);
}
encode_alaw_scalar(samples, output);
}
pub fn encode_alaw_scalar(samples: &[i16], output: &mut [u8]) {
for (i, &sample) in samples.iter().enumerate() {
output[i] = linear_to_alaw_scalar(sample);
}
}
pub fn encode_mulaw_optimized(samples: &[i16], output: &mut [u8]) {
#[cfg(target_arch = "x86_64")]
{
if get_simd_support().sse2 {
return encode_mulaw_simd_sse2(samples, output);
}
}
#[cfg(target_arch = "aarch64")]
{
if get_simd_support().neon {
return encode_mulaw_simd_neon(samples, output);
}
}
encode_mulaw_scalar(samples, output);
}
pub fn encode_alaw_optimized(samples: &[i16], output: &mut [u8]) {
#[cfg(target_arch = "x86_64")]
{
if get_simd_support().sse2 {
return encode_alaw_simd_sse2(samples, output);
}
}
#[cfg(target_arch = "aarch64")]
{
if get_simd_support().neon {
return encode_alaw_simd_neon(samples, output);
}
}
encode_alaw_scalar(samples, output);
}
pub fn linear_to_mulaw_scalar(sample: i16) -> u8 {
const CLIP: i16 = 32635;
const BIAS: i16 = 0x84;
const MULAW_MAX: u8 = 0x7F;
let mut sample = sample;
let sign = if sample < 0 {
sample = if sample == i16::MIN {
i16::MAX
} else {
-sample
};
0x80
} else {
0x00
};
if sample > CLIP {
sample = CLIP;
}
sample = sample + BIAS;
let exponent = if sample <= 0x1F {
0
} else if sample <= 0x3F {
1
} else if sample <= 0x7F {
2
} else if sample <= 0xFF {
3
} else if sample <= 0x1FF {
4
} else if sample <= 0x3FF {
5
} else if sample <= 0x7FF {
6
} else {
7
};
let mantissa = (sample >> (exponent + 3)) & 0x0F;
let mulaw = ((exponent << 4) | mantissa) as u8;
(mulaw ^ MULAW_MAX) | sign
}
pub fn linear_to_alaw_scalar(sample: i16) -> u8 {
const CLIP: i16 = 32635;
const ALAW_MAX: u8 = 0x7F;
let mut sample = sample;
let sign = if sample < 0 {
sample = if sample == i16::MIN {
i16::MAX
} else {
-sample
};
0x80
} else {
0x00
};
if sample > CLIP {
sample = CLIP;
}
let alaw = if sample < 256 {
sample >> 4
} else {
let exponent = if sample < 512 {
1
} else if sample < 1024 {
2
} else if sample < 2048 {
3
} else if sample < 4096 {
4
} else if sample < 8192 {
5
} else if sample < 16384 {
6
} else {
7
};
let mantissa = (sample >> (exponent + 3)) & 0x0F;
((exponent << 4) | mantissa) + 16
};
((alaw as u8) ^ ALAW_MAX) | sign
}
pub fn mulaw_to_linear_scalar(mulaw: u8) -> i16 {
const BIAS: i16 = 0x84;
const MULAW_MAX: u8 = 0x7F;
let mulaw = mulaw ^ MULAW_MAX;
let sign = mulaw & 0x80;
let exponent = (mulaw >> 4) & 0x07;
let mantissa = mulaw & 0x0F;
let mut sample = ((mantissa as i16) << (exponent + 3)) + BIAS;
if exponent > 0 {
sample += 1i16 << (exponent + 2);
}
if sign != 0 {
-sample
} else {
sample
}
}
pub fn alaw_to_linear_scalar(alaw: u8) -> i16 {
const ALAW_MAX: u8 = 0x7F;
let alaw = alaw ^ ALAW_MAX;
let sign = alaw & 0x80;
let magnitude = alaw & 0x7F;
let sample = if magnitude < 16 {
(magnitude as u16) << 4
} else {
let exponent = (magnitude >> 4) & 0x07;
let mantissa = magnitude & 0x0F;
let exp_shift = ((exponent + 3) as u32).min(15);
let gain_shift = ((exponent + 2) as u32).min(15);
((mantissa as u16) << exp_shift) + ((1u16) << gain_shift)
} + 8;
if sign != 0 {
-(sample as i16)
} else {
sample as i16
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_support_detection() {
init_simd_support();
let support = get_simd_support();
#[cfg(target_arch = "x86_64")]
{
println!("SSE2 support: {}", support.sse2);
}
#[cfg(target_arch = "aarch64")]
{
println!("NEON support: {}", support.neon);
}
}
#[test]
fn test_mulaw_roundtrip() {
let original = 12345i16;
let encoded = linear_to_mulaw_scalar(original);
let decoded = mulaw_to_linear_scalar(encoded);
let error = (original - decoded).abs();
assert!(error < 1000, "Error too large: {}", error);
}
#[test]
fn test_alaw_roundtrip() {
let original = 12345i16;
let encoded = linear_to_alaw_scalar(original);
let decoded = alaw_to_linear_scalar(encoded);
let error = (original - decoded).abs();
assert!(
error < 5000,
"Error too large: {} (original: {}, decoded: {})",
error,
original,
decoded
);
}
#[test]
fn test_simd_vs_scalar() {
let samples = vec![0, 1000, -1000, 16000, -16000, 32000, -32000, 12345];
let mut simd_output = vec![0u8; samples.len()];
let mut scalar_output = vec![0u8; samples.len()];
encode_mulaw_optimized(&samples, &mut simd_output);
encode_mulaw_scalar(&samples, &mut scalar_output);
assert_eq!(simd_output, scalar_output);
}
#[test]
fn test_empty_input() {
let samples: Vec<i16> = vec![];
let mut output: Vec<u8> = vec![];
encode_mulaw_optimized(&samples, &mut output);
assert_eq!(output.len(), 0);
}
#[test]
fn test_edge_cases() {
let samples = vec![i16::MAX, i16::MIN, 0];
let mut output = vec![0u8; samples.len()];
encode_mulaw_optimized(&samples, &mut output);
assert_eq!(output.len(), samples.len());
}
}