use std::num::{NonZeroU32, NonZeroUsize};
use non_empty_slice::NonEmptyVec;
use spectrograms::WindowType;
use crate::codecs::perceptual::codec::AudioCodec;
use crate::codecs::perceptual::{BandLayout, PsychoacousticConfig};
use crate::traits::AudioTypeConversion;
use crate::{AudioSampleError, AudioSampleResult, AudioSamples, ParameterError, StandardSample};
use super::celt::{CeltEncodedFrame, celt_decode_frame, celt_encode_frame};
use super::hybrid::{HybridEncodedFrame, hybrid_decode_frame, hybrid_encode_frame};
use super::mode::{OpusConfig, OpusMode, detect_mode};
use super::silk::{
SilkEncodedFrame, SilkState, silk_decode_frame_stateful, silk_encode_frame_stateful,
};
#[derive(Debug, Clone)]
pub enum OpusFrameData {
Silk(SilkEncodedFrame),
Celt(CeltEncodedFrame),
Hybrid(HybridEncodedFrame),
}
#[derive(Debug, Clone)]
pub struct OpusEncodedFrame {
pub mode: OpusMode,
pub data: OpusFrameData,
pub n_samples: usize,
}
#[derive(Debug, Clone)]
pub struct OpusEncodedAudio {
pub frames: NonEmptyVec<OpusEncodedFrame>,
pub original_length: usize,
pub sample_rate: NonZeroU32,
}
#[derive(Debug, Clone)]
pub struct OpusCodec {
pub config: OpusConfig,
pub band_layout: Option<BandLayout>,
pub psych_config: Option<PsychoacousticConfig>,
pub window: WindowType,
}
impl OpusCodec {
#[inline]
#[must_use]
pub fn new(config: OpusConfig, window: WindowType) -> Self {
Self {
config,
band_layout: None,
psych_config: None,
window,
}
}
#[inline]
#[must_use]
pub fn with_perceptual_config(
config: OpusConfig,
window: WindowType,
band_layout: BandLayout,
psych_config: PsychoacousticConfig,
) -> Self {
Self {
config,
band_layout: Some(band_layout),
psych_config: Some(psych_config),
window,
}
}
}
fn compute_frame_size(sample_rate: u32, frame_size_ms: f32) -> usize {
let raw = (sample_rate as f32 * frame_size_ms / 1000.0).round() as usize;
let even = if raw % 2 == 0 { raw } else { raw + 1 };
even.max(4)
}
fn resolve_band_layout(
band_layout: &Option<BandLayout>,
sample_rate: u32,
n_bins: NonZeroUsize,
) -> BandLayout {
band_layout
.clone()
.unwrap_or_else(|| BandLayout::celt(sample_rate as f32, n_bins))
}
fn resolve_psych_config(
psych_config: &Option<PsychoacousticConfig>,
n_bands: NonZeroUsize,
) -> PsychoacousticConfig {
psych_config.clone().unwrap_or_else(|| {
let weights = PsychoacousticConfig::uniform_weights(n_bands);
PsychoacousticConfig::mpeg1(weights.as_non_empty_slice())
})
}
impl AudioCodec for OpusCodec {
type Encoded = OpusEncodedAudio;
fn encode<T: StandardSample>(
self,
audio: &AudioSamples<T>,
) -> AudioSampleResult<Self::Encoded> {
if audio.num_channels().get() != 1 {
return Err(AudioSampleError::Parameter(ParameterError::invalid_value(
"audio",
"OpusCodec requires mono input; mix down or extract a channel first",
)));
}
let sample_rate = audio.sample_rate();
let original_length = audio.samples_per_channel().get();
let frame_size = compute_frame_size(sample_rate.get(), self.config.frame_size_ms);
let n_bins = NonZeroUsize::new(frame_size / 2)
.expect("frame_size is always even and >= 4, so frame_size/2 >= 2");
let band_layout = resolve_band_layout(&self.band_layout, sample_rate.get(), n_bins);
let n_bands = band_layout.len();
let psych_config = resolve_psych_config(&self.psych_config, n_bands);
let audio_f32 = audio.to_format::<f32>();
let channel = audio_f32
.channels()
.next()
.expect("mono channel validated above");
let all_samples: &[f32] = channel.as_slice().expect("to_format always contiguous");
let n_frames_estimate = (original_length + frame_size - 1) / frame_size;
let mut frames: Vec<OpusEncodedFrame> = Vec::with_capacity(n_frames_estimate);
let mut silk_state = SilkState::default();
let mut offset = 0;
while offset < original_length {
let end = (offset + frame_size).min(original_length);
let frame_samples = &all_samples[offset..end];
let n_samples = frame_samples.len();
let raw_mode = self.config.mode.unwrap_or_else(|| {
detect_mode(frame_samples, sample_rate.get(), self.config.bandwidth)
});
let effective_mode = if n_samples < 4 {
OpusMode::Silk
} else {
raw_mode
};
let encoded_frame = match effective_mode {
OpusMode::Silk => {
let silk_frame = silk_encode_frame_stateful(
frame_samples,
sample_rate.get(),
&mut silk_state,
)?;
OpusEncodedFrame {
mode: OpusMode::Silk,
data: OpusFrameData::Silk(silk_frame),
n_samples,
}
}
OpusMode::Celt => {
let ne = NonEmptyVec::new(frame_samples.to_vec()).map_err(|_| {
AudioSampleError::EmptyData {
operation: "opus encode".to_string(),
}
})?;
let frame_audio: AudioSamples<'static, f32> =
AudioSamples::from_mono_vec(ne, sample_rate);
let window_size = NonZeroUsize::new((n_samples / 2) * 2)
.expect("n_samples >= 4 ensures (n_samples/2)*2 >= 4");
let celt_frame = celt_encode_frame(
&frame_audio,
&band_layout,
&psych_config,
self.window.clone(),
Some(window_size),
self.config.bit_budget,
self.config.min_bits_per_band,
)?;
OpusEncodedFrame {
mode: OpusMode::Celt,
data: OpusFrameData::Celt(celt_frame),
n_samples,
}
}
OpusMode::Hybrid => {
let hybrid_frame = hybrid_encode_frame(
frame_samples,
sample_rate.get(),
&band_layout,
&psych_config,
self.window.clone(),
self.config.bit_budget,
self.config.min_bits_per_band,
&mut silk_state,
)?;
OpusEncodedFrame {
mode: OpusMode::Hybrid,
data: OpusFrameData::Hybrid(hybrid_frame),
n_samples,
}
}
};
frames.push(encoded_frame);
offset = end;
}
let frames_ne = NonEmptyVec::new(frames).map_err(|_| AudioSampleError::EmptyData {
operation: "opus encode".to_string(),
})?;
Ok(OpusEncodedAudio {
frames: frames_ne,
original_length,
sample_rate,
})
}
fn decode<U: StandardSample>(
encoded: Self::Encoded,
) -> AudioSampleResult<AudioSamples<'static, U>>
where
f32: crate::ConvertFrom<U>,
{
let sample_rate = encoded.sample_rate;
let target_length = encoded.original_length;
let mut all_samples: Vec<f32> = Vec::with_capacity(target_length);
let mut silk_state = SilkState::default();
for frame in encoded.frames.into_vec() {
let frame_samples = match frame.data {
OpusFrameData::Silk(silk_frame) => {
silk_decode_frame_stateful(&silk_frame, &mut silk_state)
}
OpusFrameData::Celt(celt_frame) => celt_decode_frame(celt_frame, sample_rate)?,
OpusFrameData::Hybrid(hybrid_frame) => {
hybrid_decode_frame(hybrid_frame, sample_rate, &mut silk_state)?
}
};
all_samples.extend(frame_samples);
}
all_samples.truncate(target_length);
let samples_ne =
NonEmptyVec::new(all_samples).map_err(|_| AudioSampleError::EmptyData {
operation: "opus encode".to_string(),
})?;
let f32_audio: AudioSamples<'static, f32> =
AudioSamples::from_mono_vec(samples_ne, sample_rate);
Ok(f32_audio.to_format::<U>())
}
}