use super::{AudioError, AudioResult};
use std::f32::consts::PI;
#[derive(Debug, Clone, PartialEq)]
pub struct ClippingReport {
pub positive_clipped: usize,
pub negative_clipped: usize,
pub max_value: f32,
pub min_value: f32,
pub total_samples: usize,
pub has_clipping: bool,
}
impl ClippingReport {
#[must_use]
pub fn clipping_percentage(&self) -> f32 {
if self.total_samples == 0 {
return 0.0;
}
let clipped = self.positive_clipped + self.negative_clipped;
(clipped as f32 / self.total_samples as f32) * 100.0
}
}
#[must_use]
pub fn detect_clipping(samples: &[f32]) -> ClippingReport {
if samples.is_empty() {
return ClippingReport {
positive_clipped: 0,
negative_clipped: 0,
max_value: 0.0,
min_value: 0.0,
total_samples: 0,
has_clipping: false,
};
}
let mut positive_clipped = 0_usize;
let mut negative_clipped = 0_usize;
let mut max_value = f32::NEG_INFINITY;
let mut min_value = f32::INFINITY;
for &sample in samples {
if sample > max_value {
max_value = sample;
}
if sample < min_value {
min_value = sample;
}
if sample > 1.0 {
positive_clipped += 1;
} else if sample < -1.0 {
negative_clipped += 1;
}
}
let has_clipping = positive_clipped > 0 || negative_clipped > 0;
ClippingReport {
positive_clipped,
negative_clipped,
max_value,
min_value,
total_samples: samples.len(),
has_clipping,
}
}
#[must_use]
pub fn has_nan(samples: &[f32]) -> bool {
samples.iter().any(|s| s.is_nan())
}
#[must_use]
pub fn has_inf(samples: &[f32]) -> bool {
samples.iter().any(|s| s.is_infinite())
}
#[must_use]
pub fn stereo_to_mono(stereo: &[f32]) -> Vec<f32> {
if stereo.is_empty() {
return Vec::new();
}
stereo
.chunks(2)
.map(|chunk| {
if chunk.len() == 2 {
(chunk[0] + chunk[1]) / 2.0
} else {
chunk[0] }
})
.collect()
}
pub fn validate_audio(samples: &[f32]) -> AudioResult<()> {
if samples.is_empty() {
return Err(AudioError::InvalidParameters(
"Audio cannot be empty".to_string(),
));
}
if has_nan(samples) {
return Err(AudioError::InvalidParameters(
"Audio contains NaN values".to_string(),
));
}
if has_inf(samples) {
return Err(AudioError::InvalidParameters(
"Audio contains Infinity values".to_string(),
));
}
let report = detect_clipping(samples);
if report.has_clipping {
return Err(AudioError::InvalidParameters(format!(
"Audio clipping detected: {} samples exceed ±1.0 (max={:.3}, min={:.3}). \
Normalize audio to [-1.0, 1.0] range.",
report.positive_clipped + report.negative_clipped,
report.max_value,
report.min_value
)));
}
Ok(())
}
#[derive(Debug, Clone)]
pub struct MelConfig {
pub n_mels: usize,
pub n_fft: usize,
pub hop_length: usize,
pub sample_rate: u32,
pub fmin: f32,
pub fmax: f32,
pub center_pad: bool,
}
impl MelConfig {
#[must_use]
pub fn whisper() -> Self {
Self {
n_mels: 80,
n_fft: 400,
hop_length: 160,
sample_rate: 16000,
fmin: 0.0,
fmax: 8000.0,
center_pad: true,
}
}
#[must_use]
pub fn tts() -> Self {
Self {
n_mels: 80,
n_fft: 1024,
hop_length: 256,
sample_rate: 22050,
fmin: 0.0,
fmax: 11025.0,
center_pad: false,
}
}
#[must_use]
pub fn custom(
n_mels: usize,
n_fft: usize,
hop_length: usize,
sample_rate: u32,
fmin: f32,
fmax: f32,
center_pad: bool,
) -> Self {
Self {
n_mels,
n_fft,
hop_length,
sample_rate,
fmin,
fmax,
center_pad,
}
}
#[must_use]
pub fn n_freqs(&self) -> usize {
self.n_fft / 2 + 1
}
}
impl Default for MelConfig {
fn default() -> Self {
Self::whisper()
}
}
#[derive(Debug, Clone)]
pub struct MelFilterbank {
config: MelConfig,
filters: Vec<f32>,
n_freqs: usize,
window: Vec<f32>,
sparse_filters: Vec<SparseFilter>,
}
#[derive(Clone, Debug)]
struct SparseFilter {
start: usize,
values: Vec<f32>,
}
struct FftScratch {
fft: std::sync::Arc<dyn rustfft::Fft<f32>>,
fft_buf: Vec<rustfft::num_complex::Complex<f32>>,
fft_scratch: Vec<rustfft::num_complex::Complex<f32>>,
power_spec: Vec<f32>,
}
impl FftScratch {
fn new(n_fft: usize, n_freqs: usize) -> Self {
use rustfft::{num_complex::Complex, FftPlanner};
let mut planner = FftPlanner::new();
let fft = planner.plan_fft_forward(n_fft);
let scratch_len = fft.get_inplace_scratch_len();
Self {
fft,
fft_buf: vec![Complex::new(0.0, 0.0); n_fft],
fft_scratch: vec![Complex::new(0.0, 0.0); scratch_len],
power_spec: vec![0.0_f32; n_freqs],
}
}
}
include!("filterbank.rs");