use std::num::NonZeroUsize;
use crate::operations::traits::AudioStatistics;
use crate::repr::AudioData;
use crate::traits::StandardSample;
use crate::{AudioSampleResult, AudioSamples, ConversionError, ParameterError};
#[cfg(feature = "transforms")]
use crate::{AudioSampleError, AudioTypeConversion, ProcessingError};
#[cfg(feature = "transforms")]
use num_complex::Complex;
#[cfg(feature = "transforms")]
use non_empty_slice::NonEmptySlice;
use ndarray::Axis;
use non_empty_slice::NonEmptyVec;
#[cfg(feature = "transforms")]
use spectrograms::FftPlanner;
impl<T> AudioStatistics for AudioSamples<'_, T>
where
T: StandardSample,
{
#[inline]
fn peak(&self) -> T {
let zero = T::default();
let abs_max_slice = |slice: &[T]| -> T {
if let Some(result) = T::avx2_abs_max(slice) {
return result;
}
let mut acc = [zero; 4];
let chunks = slice.chunks_exact(4);
let rem = chunks.remainder();
for chunk in chunks {
for j in 0..4 {
let x = chunk[j];
let ax = if x < zero { zero - x } else { x };
if ax > acc[j] {
acc[j] = ax;
}
}
}
for &x in rem {
let ax = if x < zero { zero - x } else { x };
if ax > acc[0] {
acc[0] = ax;
}
}
acc.iter().fold(zero, |a, &b| if b > a { b } else { a })
};
let fold_ndarray = |acc: T, &x: &T| {
let ax = if x < zero { zero - x } else { x };
if ax > acc { ax } else { acc }
};
match &self.data {
AudioData::Mono(arr) => match arr.as_slice() {
Some(s) => abs_max_slice(s),
None => arr.fold(zero, fold_ndarray),
},
AudioData::Multi(arr) => match arr.as_slice() {
Some(s) => abs_max_slice(s),
None => arr.fold(zero, fold_ndarray),
},
}
}
#[inline]
fn min_sample(&self) -> T {
match &self.data {
AudioData::Mono(arr) => {
arr.fold(arr[0], |acc, &x| if x < acc { x } else { acc })
}
AudioData::Multi(arr) => {
arr.fold(arr[[0, 0]], |acc, &x| if x < acc { x } else { acc })
}
}
}
#[inline]
fn max_sample(&self) -> T {
match &self.data {
AudioData::Mono(arr) => {
arr.fold(arr[0], |acc, &x| if x > acc { x } else { acc })
}
AudioData::Multi(arr) => {
arr.fold(arr[[0, 0]], |acc, &x| if x > acc { x } else { acc })
}
}
}
#[inline]
fn mean(&self) -> f64 {
match &self.data {
AudioData::Mono(mono_data) => mono_data.mean().cast_into(),
AudioData::Multi(multi_data) => multi_data.mean().cast_into(),
}
}
#[inline]
fn median(&self) -> Option<f64> {
let mono = self.as_mono()?;
let mono_len = mono.len().get();
Some(if mono_len.is_multiple_of(2) {
let first_idx = (mono_len / 2) - 1;
let first_val = self[first_idx];
let second_val = self[first_idx + 1];
let sum: f64 = (first_val + second_val).cast_into();
sum / 2.0
} else {
self[self.len().get() / 2].cast_into()
})
}
#[inline]
fn rms(&self) -> f64 {
let sum_sq_slice = |slice: &[T]| -> f64 {
let mut acc = [0.0f64; 4];
let chunks = slice.chunks_exact(4);
let rem = chunks.remainder();
for chunk in chunks {
let f0: f64 = chunk[0].cast_into();
let f1: f64 = chunk[1].cast_into();
let f2: f64 = chunk[2].cast_into();
let f3: f64 = chunk[3].cast_into();
acc[0] += f0 * f0;
acc[1] += f1 * f1;
acc[2] += f2 * f2;
acc[3] += f3 * f3;
}
for &x in rem {
let f: f64 = x.cast_into();
acc[0] += f * f;
}
acc[0] + acc[1] + acc[2] + acc[3]
};
let (sum_sq, n) = match &self.data {
AudioData::Mono(arr) => {
let s = match arr.as_slice() {
Some(s) => sum_sq_slice(s),
None => arr
.iter()
.map(|&x| {
let f: f64 = x.cast_into();
f * f
})
.sum(),
};
(s, arr.len().get())
}
AudioData::Multi(arr) => {
let s = match arr.as_slice() {
Some(s) => sum_sq_slice(s),
None => arr
.iter()
.map(|&x| {
let f: f64 = x.cast_into();
f * f
})
.sum(),
};
(s, arr.len().get())
}
};
(sum_sq / n as f64).sqrt()
}
#[inline]
fn rms_and_peak(&self) -> (f64, T) {
let zero = T::default();
let combined_slice = |slice: &[T]| -> (f64, T) {
let mut sq = [0.0f64; 8];
let mut pk = [zero; 8];
let chunks = slice.chunks_exact(8);
let rem = chunks.remainder();
for chunk in chunks {
let x0 = chunk[0];
let x1 = chunk[1];
let x2 = chunk[2];
let x3 = chunk[3];
let x4 = chunk[4];
let x5 = chunk[5];
let x6 = chunk[6];
let x7 = chunk[7];
let f0: f64 = x0.cast_into();
sq[0] += f0 * f0;
let f1: f64 = x1.cast_into();
sq[1] += f1 * f1;
let f2: f64 = x2.cast_into();
sq[2] += f2 * f2;
let f3: f64 = x3.cast_into();
sq[3] += f3 * f3;
let f4: f64 = x4.cast_into();
sq[4] += f4 * f4;
let f5: f64 = x5.cast_into();
sq[5] += f5 * f5;
let f6: f64 = x6.cast_into();
sq[6] += f6 * f6;
let f7: f64 = x7.cast_into();
sq[7] += f7 * f7;
let ax0 = if x0 < zero { zero - x0 } else { x0 };
if ax0 > pk[0] {
pk[0] = ax0;
}
let ax1 = if x1 < zero { zero - x1 } else { x1 };
if ax1 > pk[1] {
pk[1] = ax1;
}
let ax2 = if x2 < zero { zero - x2 } else { x2 };
if ax2 > pk[2] {
pk[2] = ax2;
}
let ax3 = if x3 < zero { zero - x3 } else { x3 };
if ax3 > pk[3] {
pk[3] = ax3;
}
let ax4 = if x4 < zero { zero - x4 } else { x4 };
if ax4 > pk[4] {
pk[4] = ax4;
}
let ax5 = if x5 < zero { zero - x5 } else { x5 };
if ax5 > pk[5] {
pk[5] = ax5;
}
let ax6 = if x6 < zero { zero - x6 } else { x6 };
if ax6 > pk[6] {
pk[6] = ax6;
}
let ax7 = if x7 < zero { zero - x7 } else { x7 };
if ax7 > pk[7] {
pk[7] = ax7;
}
}
for &x in rem {
let f: f64 = x.cast_into();
sq[0] += f * f;
let ax = if x < zero { zero - x } else { x };
if ax > pk[0] {
pk[0] = ax;
}
}
let sum_sq = sq[0] + sq[1] + sq[2] + sq[3] + sq[4] + sq[5] + sq[6] + sq[7];
let peak = pk.iter().fold(zero, |a, &b| if b > a { b } else { a });
(sum_sq, peak)
};
let (sum_sq, peak, n) = match &self.data {
AudioData::Mono(arr) => {
let (sq, pk) = match arr.as_slice() {
Some(s) => combined_slice(s),
None => {
let sq = arr
.iter()
.map(|&x| {
let f: f64 = x.cast_into();
f * f
})
.sum();
let pk = arr.fold(zero, |acc, &x| {
let ax = if x < zero { zero - x } else { x };
if ax > acc { ax } else { acc }
});
(sq, pk)
}
};
(sq, pk, arr.len().get())
}
AudioData::Multi(arr) => {
let (sq, pk) = match arr.as_slice() {
Some(s) => combined_slice(s),
None => {
let sq = arr
.iter()
.map(|&x| {
let f: f64 = x.cast_into();
f * f
})
.sum();
let pk = arr.fold(zero, |acc, &x| {
let ax = if x < zero { zero - x } else { x };
if ax > acc { ax } else { acc }
});
(sq, pk)
}
};
(sq, pk, arr.len().get())
}
};
((sum_sq / n as f64).sqrt(), peak)
}
#[inline]
fn variance(&self) -> f64 {
match &self.data {
AudioData::Mono(mono_data) => mono_data.variance(),
AudioData::Multi(multi_data) => multi_data
.variance_axis(Axis(0))
.mean()
.expect("Non empty data will produce a mean"),
}
}
#[inline]
fn std_dev(&self) -> f64 {
self.variance().sqrt()
}
#[inline]
fn zero_crossings(&self) -> usize {
match &self.data {
AudioData::Mono(arr) => {
if arr.len() < nzu!(2) {
return 0;
}
let mut crossings = 0;
for i in 1..arr.len().get() {
let prev: T = arr[i - 1];
let curr: T = arr[i];
if (prev > T::zero() && curr <= T::zero())
|| (prev <= T::zero() && curr > T::zero())
{
crossings += 1;
}
}
crossings
}
AudioData::Multi(arr) => {
let mut total_crossings = 0;
for channel in arr.axis_iter(Axis(0)) {
if channel.len() < 2 {
continue;
}
for i in 1..channel.len() {
let prev: T = channel[i - 1];
let curr: T = channel[i];
if (prev > T::zero() && curr <= T::zero())
|| (prev <= T::zero() && curr > T::zero())
{
total_crossings += 1;
}
}
}
total_crossings
}
}
}
#[inline]
fn zero_crossing_rate(&self) -> f64 {
let crossings = self.zero_crossings() as f64;
let duration_seconds = self.duration_seconds(); crossings / duration_seconds
}
#[cfg(feature = "transforms")]
#[inline]
fn autocorrelation(&self, max_lag: NonZeroUsize) -> Option<NonEmptyVec<f64>> {
let max_lag = max_lag.get();
let (signal, n) = match &self.data {
AudioData::Mono(arr) => {
let n = arr.len().get();
let sig: Vec<f64> = arr.iter().map(|&x| x.cast_into()).collect();
(sig, n)
}
AudioData::Multi(arr) => {
let first_channel = arr.row(0);
let n = first_channel.len();
let sig: Vec<f64> = first_channel.iter().map(|&x| x.cast_into()).collect();
(sig, n)
}
};
let effective_max_lag = max_lag.min(n - 1);
let fft_size = (2 * n - 1).next_power_of_two();
let mut padded: Vec<f64> = Vec::with_capacity(fft_size);
padded.extend_from_slice(&signal);
padded.resize(fft_size, 0.0);
let mut planner = FftPlanner::new();
let padded_slice = unsafe { NonEmptySlice::new_unchecked(&padded[..]) };
let fft_size_nz =
unsafe { NonZeroUsize::new_unchecked(fft_size) };
let spectrum = planner.fft(padded_slice, fft_size_nz).ok()?;
let power: Vec<Complex<f64>> = spectrum
.iter()
.map(|c| Complex {
re: c.norm_sqr(),
im: 0.0,
})
.collect();
let power_slice = unsafe { NonEmptySlice::new_unchecked(&power[..]) };
let raw = planner.irfft(power_slice, fft_size_nz).ok()?;
let fft_size_f = fft_size as f64;
let mut correlations: Vec<f64> = Vec::with_capacity(effective_max_lag + 1);
for lag in 0..=effective_max_lag {
let overlap_count = (n - lag) as f64;
correlations.push(raw[lag] / fft_size_f / overlap_count);
}
Some(unsafe { NonEmptyVec::new_unchecked(correlations) })
}
#[inline]
fn cross_correlation(
&self,
other: &Self,
max_lag: NonZeroUsize,
) -> AudioSampleResult<NonEmptyVec<f64>> {
if self.num_channels() != other.num_channels() {
return Err(crate::AudioSampleError::Parameter(
ParameterError::invalid_value(
"channels",
"Signals must have the same number of channels for cross-correlation",
),
));
}
match (&self.data, &other.data) {
(AudioData::Mono(arr1), AudioData::Mono(arr2)) => {
let n1 = arr1.len();
let n2 = arr2.len();
let effective_max_lag: usize = max_lag.get().min(n1.get().min(n2.get()) - 1);
let correlations = Vec::with_capacity(effective_max_lag + 1);
let mut correlations = unsafe { NonEmptyVec::new_unchecked(correlations) };
for lag in 0..=effective_max_lag {
let mut correlation: T = T::zero();
let count = n1.get().min(n2.get() - lag);
for i in 0..count {
let s1 = arr1[i];
let s2 = arr2[i + lag];
correlation += s1 * s2;
}
let mut correlation = correlation.cast_into();
correlation /= count as f64;
correlations.push(correlation);
}
Ok(correlations)
}
(AudioData::Multi(arr1), AudioData::Multi(arr2)) => {
let ch1 = arr1.row(0);
let ch2 = arr2.row(0);
let n1 = ch1.len();
let n2 = ch2.len();
let effective_max_lag: usize = max_lag.get().min(n1.min(n2 - 1));
let correlations = Vec::with_capacity(effective_max_lag + 1);
let mut correlations = unsafe { NonEmptyVec::new_unchecked(correlations) };
for lag in 0..=effective_max_lag {
let mut correlation = T::zero();
let count = n1.min(n2 - lag);
for i in 0..count {
let s1 = ch1[i];
let s2 = ch2[i + lag];
correlation += s1 * s2;
}
let mut correlation: f64 = correlation.cast_into();
correlation /= count as f64;
correlations.push(correlation);
}
Ok(correlations)
}
_ => {
Err(crate::AudioSampleError::Conversion(
ConversionError::audio_conversion(
"Mixed",
"cross_correlation",
"compatible",
"Cannot correlate mono and multi-channel signals",
),
))
}
}
}
#[cfg(feature = "transforms")]
#[inline]
fn spectral_centroid(&self) -> AudioSampleResult<f64> {
if self.is_multi_channel() {
return Err(crate::AudioSampleError::Parameter(
ParameterError::invalid_value(
"channels",
"spectral_centroid is only defined for mono signals",
),
));
}
let working_samples = self.to_format::<f64>();
let working_slice = working_samples.as_slice().expect(
"Mono audio means a 1d array which is contiguous, which means as_slice cannot fail",
);
let working_samples = unsafe { NonEmptySlice::new_unchecked(working_slice) };
let mut planner = FftPlanner::new();
let n = self.len();
let fft_output_size = n.div_ceil(nzu!(2)).checked_add(1).expect(
"n is non-zero since self is non-empty by design and n/2 is always << usize::MAX, which means n/2 + 1 << usize::MAX and cannot overflow",
);
let power_spectrum = planner.power_spectrum(working_samples, n, None)?;
let nyquist = self.nyquist();
let freq_step = nyquist / (fft_output_size.get() - 1) as f64;
let (weighted_sum, total_energy) = power_spectrum.iter().enumerate().fold(
(0.0, 0.0),
|(mut w_sum, mut t_energy), (i, &power)| {
let frequency = i as f64 * freq_step;
w_sum += frequency * power;
t_energy += power;
(w_sum, t_energy)
},
);
if total_energy > 0.0 {
Ok(weighted_sum / total_energy)
} else if total_energy == 0.0 {
Ok(0.0)
} else {
Err(AudioSampleError::Processing(
ProcessingError::MathematicalFailure {
operation: "spectral_centroid".to_string(),
reason: format!(
"Total spectral energy is negative, cannot compute centroid: total_energy={total_energy},weighted_sum={weighted_sum}"
),
},
))
}
}
#[cfg(feature = "transforms")]
#[inline]
fn spectral_rolloff(&self, rolloff_percent: f64) -> AudioSampleResult<f64> {
if rolloff_percent <= 0.0 || rolloff_percent >= 1.0 {
return Err(crate::AudioSampleError::Parameter(
ParameterError::out_of_range(
"rolloff_percent",
rolloff_percent.to_string(),
"0.0",
"1.0",
"rolloff_percent must be between 0.0 and 1.0",
),
));
}
match &self.data {
AudioData::Mono(arr) => {
let n = arr.len();
let input: NonEmptyVec<f64> = unsafe {
NonEmptyVec::new_unchecked(
arr.mapv(super::super::traits::ConvertTo::convert_to)
.to_vec(),
)
};
let mut planner = FftPlanner::new();
let power_spectrum = planner.power_spectrum(input.as_non_empty_slice(), n, None)?;
let nyquist = self.nyquist();
let freq_step = nyquist / (power_spectrum.len().get() as f64 - 1.0);
let total_energy: f64 = power_spectrum.iter().fold(0.0, |acc, &x| acc + x);
if total_energy == 0.0 {
return Ok(0.0);
} else if total_energy < 0.0 {
return Err(AudioSampleError::Processing(ProcessingError::MathematicalFailure { operation: "spectral_rolloff".to_string(), reason: "Total spectral energy is negative, cannot compute rolloff frequency".to_string()}));
}
let target_energy = total_energy * rolloff_percent;
let mut cumulative_energy = 0.0;
for (i, &power) in power_spectrum.iter().enumerate() {
cumulative_energy += power;
if cumulative_energy >= target_energy {
let frequency = i as f64 * freq_step;
return Ok(frequency);
}
}
Ok(nyquist)
}
AudioData::Multi(arr) => {
let first_channel = arr.row(0);
let n = first_channel.len();
let n = unsafe { NonZeroUsize::new_unchecked(n) };
let input: NonEmptyVec<f64> = unsafe {
NonEmptyVec::new_unchecked(
first_channel.iter().map(|&x| x.convert_to()).collect(),
)
};
let mut planner = FftPlanner::new();
let power_spectrum = planner.power_spectrum(input.as_non_empty_slice(), n, None)?;
let nyquist = self.nyquist();
let freq_step = nyquist / (power_spectrum.len().get() as f64 - 1.0);
let total_energy: f64 = power_spectrum.iter().fold(0.0, |acc, &x| acc + x);
if total_energy == 0.0 {
return Ok(0.0);
} else if total_energy < 0.0 {
return Err(AudioSampleError::Processing(ProcessingError::MathematicalFailure { operation: "spectral_rolloff".to_string(), reason: "Total spectral energy is negative, cannot compute rolloff frequency".to_string()}));
}
let target_energy = total_energy * rolloff_percent;
let mut cumulative_energy = 0.0;
for (i, &power) in power_spectrum.iter().enumerate() {
cumulative_energy += power;
if cumulative_energy >= target_energy {
let frequency = i as f64 * freq_step;
return Ok(frequency);
}
}
Ok(nyquist)
}
}
}
}
#[cfg(test)]
mod tests {
#[cfg(feature = "transforms")]
use std::time::Duration;
use super::*;
use crate::sample_rate;
use approx_eq::assert_approx_eq;
use ndarray::{Array1, array};
#[test]
fn test_peak_min_max_existing_methods() {
let data = array![-3.0f32, -1.0, 0.0, 2.0, 4.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert_eq!(audio.peak(), 4.0);
assert_eq!(audio.min_sample(), -3.0);
assert_eq!(audio.max_sample(), 4.0);
}
#[test]
fn test_rms_computation() {
let data = array![1.0f32, -1.0, 1.0, -1.0];
let audio: AudioSamples<'static, f32> =
AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let rms = audio.rms(); assert_approx_eq!(rms, 1.0, 1e-6);
}
#[test]
fn test_variance_and_std_dev() {
let data = array![1.0f32, 2.0, 3.0, 4.0, 5.0];
let audio: AudioSamples<'_, f32> =
AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let variance = audio.variance();
let std_dev = audio.std_dev();
assert_approx_eq!(variance, 2.0, 1e-6);
assert_approx_eq!(std_dev, 2.0_f64.sqrt(), 1e-6);
}
#[test]
fn test_zero_crossings() {
let data = array![1.0f32, -1.0, 1.0, -1.0, 1.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let crossings = audio.zero_crossings();
assert_eq!(crossings, 4);
}
#[test]
fn test_zero_crossing_rate() {
let sample_rate = 44100;
let duration = 1.0; let freq = 4.0;
let n_samples = (sample_rate as f64 * duration) as usize;
let mut data = Vec::with_capacity(n_samples);
for i in 0..n_samples {
let t = i as f64 / sample_rate as f64;
let phase = 2.0 * std::f64::consts::PI * freq * t;
let value = if phase.sin() >= 0.0 { 1.0 } else { -1.0 };
data.push(value);
}
let audio = AudioSamples::new_mono(Array1::from(data).into(), sample_rate!(44100)).unwrap();
let zcr = audio.zero_crossing_rate();
assert!(
(zcr - 8.0f64).abs() <= 1.0,
"Expected ~8 crossings/sec, got {}",
zcr
);
}
#[test]
#[cfg(feature = "transforms")]
fn test_autocorrelation() {
let data = array![1.0f32, 0.0, -1.0, 0.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
let autocorr = audio.autocorrelation(crate::nzu!(2)).unwrap();
assert_eq!(autocorr.len(), crate::nzu!(3));
assert!(autocorr[0] >= autocorr[1]);
assert!(autocorr[0] >= autocorr[2]);
}
#[test]
#[cfg(feature = "transforms")]
fn test_cross_correlation() {
let data1 = array![1.0f32, 0.0, -1.0];
let data2 = array![1.0f32, 0.0, -1.0]; let audio1 = AudioSamples::new_mono(data1.into(), sample_rate!(44100)).unwrap();
let audio2 = AudioSamples::new_mono(data2.into(), sample_rate!(44100)).unwrap();
let cross_corr = audio1.cross_correlation(&audio2, crate::nzu!(1)).unwrap();
let autocorr = audio1.autocorrelation(crate::nzu!(1)).unwrap();
assert_eq!(cross_corr.len(), autocorr.len());
}
#[test]
fn test_multi_channel_statistics() {
let data = array![[1.0f32, 2.0], [-1.0, 1.0]]; let audio = AudioSamples::new_multi_channel(data.into(), sample_rate!(44100)).unwrap();
let rms: f64 = audio.rms();
let variance = audio.variance();
let crossings = audio.zero_crossings();
assert!(rms > 0.0);
assert!(variance >= 0.0);
assert_eq!(crossings, 1); }
#[test]
fn test_edge_cases() {
let single_data = array![1.0f32];
let single_audio = AudioSamples::new_mono(single_data.into(), sample_rate!(44100)).unwrap();
assert_eq!(single_audio.zero_crossings(), 0);
assert_eq!(
single_audio.rms(),
1.0,
"RMS of single sample should be the sample itself"
);
}
#[test]
fn test_empty_audio_rejected() {
let empty_data: Array1<f32> = Array1::from(vec![]);
let empty_audio = AudioSamples::new_mono(empty_data.into(), sample_rate!(44100));
assert!(empty_audio.is_err(), "Empty audio should not be created");
}
#[test]
#[cfg(feature = "transforms")]
fn test_spectral_centroid() {
let sample_rate = sample_rate!(44100);
let duration = Duration::from_secs_f32(1.0); let freq = 1000.0;
let audio = crate::sine_wave::<f64>(freq, duration, sample_rate, 0.5);
let centroid = audio
.spectral_centroid()
.expect("Failed to compute spectral centroid");
assert!(
(centroid - freq).abs() < 50.0,
"Centroid {} should be close to {}",
centroid,
freq
);
}
#[test]
#[cfg(all(feature = "transforms", feature = "random-generation"))]
fn test_spectral_rolloff() {
let sample_rate = sample_rate!(8000); let duration = Duration::from_secs_f32(1.0);
let audio = crate::white_noise::<f64>(duration, sample_rate, 0.5, None);
let rolloff = audio
.spectral_rolloff(0.85)
.expect("Failed to compute spectral rolloff");
let nyquist = audio.nyquist();
assert!(rolloff > 0.0, "Rolloff should be positive");
assert!(
rolloff <= nyquist,
"Rolloff should not exceed Nyquist frequency"
);
}
#[test]
#[cfg(feature = "transforms")]
fn test_spectral_rolloff_validation() {
let data = array![1.0f32, -1.0, 1.0];
let audio = AudioSamples::new_mono(data, sample_rate!(44100)).unwrap();
assert!(audio.spectral_rolloff(0.0).is_err());
assert!(audio.spectral_rolloff(1.0).is_err());
assert!(audio.spectral_rolloff(-0.1).is_err());
assert!(audio.spectral_rolloff(1.1).is_err());
assert!(audio.spectral_rolloff(0.85).is_ok());
}
}