use std::num::{NonZeroU32, NonZeroUsize};
use non_empty_slice::{NonEmptySlice, NonEmptyVec};
use spectrograms::{MdctParams, WindowType};
use crate::traits::AudioTypeConversion;
use crate::{AudioSampleError, AudioSampleResult, AudioSamples, ParameterError, StandardSample};
pub mod bands;
pub mod codec;
pub mod masking;
pub mod quantization;
pub mod stereo;
mod traits;
pub use codec::{AudioCodec, EncodedSegment, PerceptualCodec, PerceptualEncodedAudio};
pub use masking::{apply_temporal_masking, detect_transient_windows};
pub use stereo::{StereoPerceptualCodec, StereoPerceptualEncodedAudio};
pub use traits::AudioPerceptualAnalysis;
#[derive(Debug, Clone, PartialEq)]
pub struct Band {
pub start_bin: usize,
pub end_bin: usize,
pub centre_frequency: f32,
pub perceptual_position: f32,
}
impl Band {
#[inline]
#[must_use]
pub fn try_new(
start_bin: usize,
end_bin: usize,
centre_frequency: f32,
perceptual_position: f32,
) -> AudioSampleResult<Self> {
if end_bin <= start_bin {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"end_bin",
format!("end_bin ({end_bin}) must be greater than start_bin ({start_bin})"),
)));
}
Ok(Self {
start_bin,
end_bin,
centre_frequency,
perceptual_position,
})
}
#[inline]
#[must_use]
pub const unsafe fn new(
start_bin: usize,
end_bin: usize,
centre_frequency: f32,
perceptual_position: f32,
) -> Self {
Self {
start_bin,
end_bin,
centre_frequency,
perceptual_position,
}
}
#[inline]
#[must_use]
pub const fn width(&self) -> usize {
self.end_bin.saturating_sub(self.start_bin)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct BandLayout {
pub bands: NonEmptyVec<Band>,
}
impl BandLayout {
#[inline]
#[must_use]
pub fn new(bands: &NonEmptySlice<Band>) -> Self {
Self {
bands: bands.to_non_empty_vec(),
}
}
#[inline]
#[must_use]
pub fn len(&self) -> NonZeroUsize {
self.bands.len()
}
#[inline]
#[must_use]
pub fn as_slice(&self) -> &NonEmptySlice<Band> {
self.bands.as_non_empty_slice()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct BandMetric {
pub band: Band,
pub energy: f32,
pub masking_threshold: f32,
pub signal_to_mask_ratio: f32,
pub importance: f32,
pub allowed_noise: f32,
}
impl BandMetric {
#[inline]
#[must_use]
pub const fn new(
band: Band,
energy: f32,
masking_threshold: f32,
signal_to_mask_ratio: f32,
importance: f32,
allowed_noise: f32,
) -> Self {
Self {
band,
energy,
masking_threshold,
signal_to_mask_ratio,
importance,
allowed_noise,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct BandMetrics {
pub metrics: NonEmptyVec<BandMetric>,
}
impl BandMetrics {
#[inline]
#[must_use]
pub fn new(metrics: &NonEmptySlice<BandMetric>) -> Self {
Self {
metrics: metrics.to_non_empty_vec(),
}
}
#[inline]
#[must_use]
pub fn len(&self) -> NonZeroUsize {
self.metrics.len()
}
#[inline]
#[must_use]
pub fn as_slice(&self) -> &NonEmptySlice<BandMetric> {
self.metrics.as_non_empty_slice()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct PsychoacousticConfig {
pub noise_floor: f32,
pub masking_gain: f32,
pub noise_masking_gain: f32,
pub upward_spread: f32,
pub downward_spread: f32,
pub perceptual_weights: NonEmptyVec<f32>,
pub epsilon: f32,
}
impl PsychoacousticConfig {
#[inline]
#[must_use]
pub fn new(
noise_floor: f32,
masking_gain: f32,
noise_masking_gain: f32,
upward_spread: f32,
downward_spread: f32,
perceptual_weights: &NonEmptySlice<f32>,
epsilon: f32,
) -> Self {
Self {
noise_floor,
masking_gain,
noise_masking_gain,
upward_spread,
downward_spread,
perceptual_weights: perceptual_weights.to_non_empty_vec(),
epsilon,
}
}
#[inline]
#[must_use]
pub fn band_count(&self) -> NonZeroUsize {
self.perceptual_weights.len()
}
#[inline]
#[must_use]
pub fn is_compatible_with(&self, band_layout: &BandLayout) -> bool {
self.band_count() == band_layout.len()
}
#[inline]
#[must_use]
pub fn mpeg1(perceptual_weights: &NonEmptySlice<f32>) -> Self {
Self::new(-60.0, 14.5, 5.5, 25.0, 6.0, perceptual_weights, 1e-10)
}
#[inline]
#[must_use]
pub fn conservative(perceptual_weights: &NonEmptySlice<f32>) -> Self {
Self::new(-80.0, 10.0, 3.0, 20.0, 4.0, perceptual_weights, 1e-10)
}
#[inline]
#[must_use]
pub fn aggressive(perceptual_weights: &NonEmptySlice<f32>) -> Self {
Self::new(-40.0, 18.0, 7.0, 30.0, 8.0, perceptual_weights, 1e-10)
}
#[inline]
#[must_use]
pub fn uniform_weights(n_bands: NonZeroUsize) -> NonEmptyVec<f32> {
let w = vec![1.0_f32; n_bands.get()];
NonEmptyVec::new(w).expect("n_bands >= 1")
}
}
#[derive(Debug, Clone)]
pub struct PerceptualAnalysisResult {
pub coefficients: NonEmptyVec<f32>,
pub bin_energies: NonEmptyVec<f32>,
pub band_metrics: BandMetrics,
pub n_coefficients: NonZeroUsize,
pub n_frames: NonZeroUsize,
pub original_length: usize,
pub sample_rate: NonZeroU32,
pub mdct_params: MdctParams,
}
impl PerceptualAnalysisResult {
#[inline]
#[must_use]
pub fn new(
coefficients: NonEmptyVec<f32>,
bin_energies: NonEmptyVec<f32>,
band_metrics: BandMetrics,
n_coefficients: NonZeroUsize,
n_frames: NonZeroUsize,
original_length: usize,
sample_rate: NonZeroU32,
mdct_params: MdctParams,
) -> Self {
Self {
coefficients,
bin_energies,
band_metrics,
n_coefficients,
n_frames,
original_length,
sample_rate,
mdct_params,
}
}
}
pub fn analyse_signal<T>(
signal: &AudioSamples<T>,
window: WindowType,
band_layout: &BandLayout,
config: &PsychoacousticConfig,
) -> AudioSampleResult<PerceptualAnalysisResult>
where
T: StandardSample,
{
analyse_signal_with_window_size(signal, window, None, band_layout, config)
}
pub fn analyse_signal_with_window_size<T>(
signal: &AudioSamples<T>,
window: WindowType,
window_size: Option<NonZeroUsize>,
band_layout: &BandLayout,
config: &PsychoacousticConfig,
) -> AudioSampleResult<PerceptualAnalysisResult>
where
T: StandardSample,
{
if !config.is_compatible_with(band_layout) {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"config",
format!(
"PsychoacousticConfig has {} weights but BandLayout has {} bands",
config.band_count(),
band_layout.len(),
),
)));
}
if signal.num_channels().get() != 1 {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"signal",
"psychoacoustic analysis requires mono input; mix down or extract a channel first",
)));
}
let n_samples = signal.samples_per_channel().get();
let window_size_val = if let Some(ws) = window_size {
let ws = ws.get();
if ws < 4 {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"window_size",
"window size must be at least 4",
)));
}
if ws % 2 != 0 {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"window_size",
"window size must be even",
)));
}
ws
} else {
let raw = 2048_usize.min(n_samples);
if raw % 2 == 0 { raw } else { raw - 1 }
};
if window_size_val < 4 {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"signal",
format!(
"signal too short for psychoacoustic analysis: {n_samples} samples (minimum 4)"
),
)));
}
let window_size = NonZeroUsize::new(window_size_val).expect("validated >= 4");
let hop_size = NonZeroUsize::new(window_size_val / 2).expect("window_size >= 4");
let mdct_params = MdctParams::new(window_size, hop_size, window)?;
let n_bins = mdct_params.n_coefficients();
let sample_rate = signal.sample_rate();
let original_length = signal.samples_per_channel().get();
let signal_f32 = signal.to_format::<f32>();
let channel = signal_f32.channels().next().expect("validated mono above");
let samples: &[f32] = channel
.as_slice()
.expect("mono channel is always contiguous");
let samples_ne = unsafe { NonEmptySlice::new_unchecked(samples) };
let mdct_matrix = spectrograms::mdct_f32(samples_ne, &mdct_params)?;
let n_frames_raw = mdct_matrix.ncols();
let coefficients_vec: Vec<f32> = mdct_matrix.iter().copied().collect();
let coefficients =
NonEmptyVec::new(coefficients_vec).expect("MDCT matrix is non-empty for valid input");
let bin_energies_vec: Vec<f32> = (0..n_bins)
.map(|k| {
let sum: f32 = (0..n_frames_raw).map(|f| mdct_matrix[(k, f)].powi(2)).sum();
sum / n_frames_raw as f32
})
.collect();
let bin_energies =
NonEmptyVec::new(bin_energies_vec).expect("n_bins >= 1 for window_size >= 4");
let band_metrics = masking::compute_band_metrics(
bin_energies.as_non_empty_slice().as_slice(),
band_layout,
config,
n_bins,
);
let n_coefficients = NonZeroUsize::new(n_bins).expect("n_bins >= 1 for window_size >= 4");
let n_frames = NonZeroUsize::new(n_frames_raw).expect("at least one MDCT frame");
Ok(PerceptualAnalysisResult::new(
coefficients,
bin_energies,
band_metrics,
n_coefficients,
n_frames,
original_length,
sample_rate,
mdct_params,
))
}
pub fn reconstruct_signal(
coefficients: &NonEmptyVec<f32>,
n_coefficients: NonZeroUsize,
n_frames: NonZeroUsize,
params: &MdctParams,
original_length: Option<usize>,
sample_rate: NonZeroU32,
) -> AudioSampleResult<AudioSamples<'static, f32>> {
use ndarray::Array2;
let nc = n_coefficients.get();
let nf = n_frames.get();
let expected = nc * nf;
if coefficients.len().get() != expected {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"coefficients",
format!(
"expected {nc} × {nf} = {expected} elements, got {}",
coefficients.len()
),
)));
}
let coef_vec: Vec<f32> = coefficients.iter().copied().collect();
let coef_matrix = Array2::from_shape_vec((nc, nf), coef_vec)?;
let samples = spectrograms::imdct_f32(&coef_matrix, params, original_length)?;
let samples_ne = NonEmptyVec::new(samples).map_err(|_| AudioSampleError::EmptyData)?;
Ok(AudioSamples::from_mono_vec(samples_ne, sample_rate))
}