use core::mem::MaybeUninit;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::{
vcombine_s16, vcvtaq_s32_f32, vdupq_n_f32, vld1q_f32, vmaxq_f32, vminq_f32, vmulq_n_f32,
vqmovn_s32, vst1q_s16,
};
const I16_MUL: f32 = 32_768.0;
#[inline]
#[doc(hidden)]
pub fn f32_to_i16_quantize_scalar(out: &mut [MaybeUninit<i16>], src: &[f32]) {
assert_eq!(
out.len(),
src.len(),
"f32_to_i16_quantize_scalar: out.len() ({}) must equal src.len() ({}) (one i16 per input f32)",
out.len(),
src.len(),
);
for (s, q) in src.iter().zip(out.iter_mut()) {
let clipped = s.clamp(-1.0, 1.0);
q.write((clipped * I16_MUL).round() as i16);
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn f32_to_i16_quantize_neon(out: &mut [MaybeUninit<i16>], src: &[f32]) {
assert_eq!(
out.len(),
src.len(),
"f32_to_i16_quantize_neon: out.len() ({}) must equal src.len() ({}) (one i16 per input f32)",
out.len(),
src.len(),
);
let n = src.len();
let body_len = n - (n % 8);
unsafe {
let src_base = src.as_ptr();
let dst_base = out.as_mut_ptr().cast::<i16>();
let lo_bound = vdupq_n_f32(-1.0);
let hi_bound = vdupq_n_f32(1.0);
let mut i = 0usize;
while i + 8 <= body_len {
let v_lo = vld1q_f32(src_base.add(i));
let v_hi = vld1q_f32(src_base.add(i + 4));
let v_lo = vminq_f32(vmaxq_f32(v_lo, lo_bound), hi_bound);
let v_hi = vminq_f32(vmaxq_f32(v_hi, lo_bound), hi_bound);
let v_lo = vmulq_n_f32(v_lo, I16_MUL);
let v_hi = vmulq_n_f32(v_hi, I16_MUL);
let i_lo = vcvtaq_s32_f32(v_lo);
let i_hi = vcvtaq_s32_f32(v_hi);
let combined = vcombine_s16(vqmovn_s32(i_lo), vqmovn_s32(i_hi));
vst1q_s16(dst_base.add(i), combined);
i += 8;
}
}
if body_len < n {
f32_to_i16_quantize_scalar(&mut out[body_len..], &src[body_len..]);
}
}
#[inline]
#[doc(hidden)]
pub fn f32_to_i16_quantize(out: &mut [MaybeUninit<i16>], src: &[f32]) {
assert_eq!(
out.len(),
src.len(),
"simd::audio::f32_to_i16_quantize: out.len() ({}) must equal src.len() ({})",
out.len(),
src.len(),
);
#[cfg(target_arch = "aarch64")]
{
if crate::simd::is_neon_available() {
unsafe { f32_to_i16_quantize_neon(out, src) };
return;
}
}
f32_to_i16_quantize_scalar(out, src);
}
#[cfg(test)]
mod tests {
use core::mem::MaybeUninit;
use super::{f32_to_i16_quantize, f32_to_i16_quantize_scalar};
use crate::simd::diff::{assert_eq_over_lane_sweep, lane_sweep_lengths};
fn quantize_scalar_init(src: &[f32]) -> Vec<i16> {
let n = src.len();
let mut v: Vec<i16> = Vec::with_capacity(n);
let spare: &mut [MaybeUninit<i16>] = v.spare_capacity_mut();
f32_to_i16_quantize_scalar(&mut spare[..n], src);
unsafe { v.set_len(n) };
v
}
fn quantize_dispatch_init(src: &[f32]) -> Vec<i16> {
let n = src.len();
let mut v: Vec<i16> = Vec::with_capacity(n);
let spare: &mut [MaybeUninit<i16>] = v.spare_capacity_mut();
f32_to_i16_quantize(&mut spare[..n], src);
unsafe { v.set_len(n) };
v
}
#[cfg(target_arch = "aarch64")]
fn quantize_neon_init(src: &[f32]) -> Vec<i16> {
let n = src.len();
let mut v: Vec<i16> = Vec::with_capacity(n);
let spare: &mut [MaybeUninit<i16>] = v.spare_capacity_mut();
unsafe {
super::f32_to_i16_quantize_neon(&mut spare[..n], src);
v.set_len(n);
}
v
}
fn gen_samples(n: usize) -> Vec<f32> {
(0..n)
.map(|i| {
let step = 0.137_f32;
let v = -1.5 + (i as f32) * step;
((v + 1.5).rem_euclid(3.0)) - 1.5
})
.collect()
}
#[test]
fn quantize_scalar_matches_dispatcher_exact() {
assert_eq_over_lane_sweep(8, quantize_scalar_init, quantize_dispatch_init, gen_samples);
}
#[cfg(target_arch = "aarch64")]
#[test]
fn quantize_neon_matches_scalar_bit_identical() {
if !crate::simd::is_neon_available() {
return;
}
for &n in &[0usize, 1, 7, 8, 9, 15, 16, 17, 23, 24, 25, 64, 1024] {
let src = gen_samples(n);
let scalar = quantize_scalar_init(&src);
let neon = quantize_neon_init(&src);
assert_eq!(
neon, scalar,
"quantize_neon vs quantize_scalar differ at n={n}"
);
}
}
#[test]
fn quantize_lane_sweep_covers_tile_boundaries() {
let sweep = lane_sweep_lengths(8);
assert_eq!(sweep, [0, 1, 7, 8, 9, 15, 16, 24, 25]);
}
#[test]
fn quantize_empty_is_noop() {
assert!(quantize_dispatch_init(&[]).is_empty());
assert!(quantize_scalar_init(&[]).is_empty());
}
#[test]
fn quantize_specific_values() {
let src = [0.0_f32, 1.0, -1.0, 2.0, -2.0, 0.5, -0.5];
let expected = [0_i16, 32767, -32768, 32767, -32768, 16384, -16384];
let out = quantize_dispatch_init(&src);
assert_eq!(out, expected);
let out_scalar = quantize_scalar_init(&src);
assert_eq!(out_scalar, expected);
}
#[test]
fn quantize_matches_reference_loop() {
let n = 65_536_usize;
let src: Vec<f32> = (0..n)
.map(|i| {
let mag = 0.001 * (i % 2048) as f32;
if i.is_multiple_of(2) { mag } else { -mag }
})
.collect();
let mut reference: Vec<i16> = Vec::with_capacity(n);
for &s in &src {
let clipped = s.clamp(-1.0, 1.0);
reference.push((clipped * 32_768.0).round() as i16);
}
let new = quantize_dispatch_init(&src);
assert_eq!(new, reference);
}
#[test]
fn quantize_read_write_round_trip_is_symmetric() {
const I16_DIV: f32 = 32_768.0;
for k in -32_768_i32..32_768_i32 {
let f = k as f32 / I16_DIV;
let quantized: Vec<i16> = quantize_dispatch_init(&[f]);
let q = quantized[0];
assert_eq!(
q, k as i16,
"f={f} (k={k}) must quantize to exactly {k} (got {q})"
);
let reconstructed = q as f32 / I16_DIV;
assert_eq!(
reconstructed.to_bits(),
f.to_bits(),
"round-trip drift at k={k}: f={f}, reconstructed={reconstructed}"
);
}
}
#[test]
#[should_panic(expected = "f32_to_i16_quantize_scalar: out.len() (5) must equal src.len() (7)")]
fn quantize_scalar_panics_on_size_mismatch_in_release() {
let src = [0.0_f32; 7];
let mut v: Vec<i16> = Vec::with_capacity(5);
let spare: &mut [MaybeUninit<i16>] = v.spare_capacity_mut();
f32_to_i16_quantize_scalar(&mut spare[..5], &src);
}
#[test]
#[should_panic(
expected = "simd::audio::f32_to_i16_quantize: out.len() (5) must equal src.len() (7)"
)]
fn quantize_dispatch_panics_on_size_mismatch_in_release() {
let src = [0.0_f32; 7];
let mut v: Vec<i16> = Vec::with_capacity(5);
let spare: &mut [MaybeUninit<i16>] = v.spare_capacity_mut();
f32_to_i16_quantize(&mut spare[..5], &src);
}
#[cfg(target_arch = "aarch64")]
#[test]
#[should_panic(expected = "f32_to_i16_quantize_neon: out.len() (5) must equal src.len() (7)")]
fn quantize_neon_panics_on_size_mismatch_in_release() {
if !crate::simd::is_neon_available() {
panic!(
"f32_to_i16_quantize_neon: out.len() (5) must equal src.len() (7) (skipped — NEON unavailable)"
);
}
let src = [0.0_f32; 7];
let mut v: Vec<i16> = Vec::with_capacity(5);
let spare: &mut [MaybeUninit<i16>] = v.spare_capacity_mut();
unsafe { super::f32_to_i16_quantize_neon(&mut spare[..5], &src) };
}
}