use std::collections::VecDeque;
use oxideav_celt::encoder::{CeltEncoder, FRAME_SAMPLES, SAMPLE_RATE};
use oxideav_codec::Encoder;
use oxideav_core::{
AudioFrame, CodecId, CodecParameters, Error, Frame, Packet, Result, SampleFormat, TimeBase,
};
const OPUS_CONFIG_CELT_FB_20MS: u8 = 31;
pub fn build_toc_byte(stereo: bool) -> u8 {
let stereo_bit: u8 = if stereo { 1 } else { 0 };
(OPUS_CONFIG_CELT_FB_20MS << 3) | (stereo_bit << 2) }
pub const OPUS_FRAME_SAMPLES: usize = 960;
pub struct OpusEncoder {
out_params: CodecParameters,
input_channels: u16,
celt: CeltEncoder,
output: VecDeque<Packet>,
pts_counter: i64,
}
impl OpusEncoder {
pub fn new(params: &CodecParameters) -> Result<Self> {
let channels = params.channels.unwrap_or(1);
if channels == 0 || channels > 2 {
return Err(Error::unsupported(format!(
"opus encoder: only mono/stereo supported, got {channels}-channel input"
)));
}
let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
if sr != SAMPLE_RATE {
return Err(Error::unsupported(format!(
"opus encoder: input must be 48 kHz (got {sr}); resample before encoding"
)));
}
let mut celt_params = params.clone();
celt_params.channels = Some(1);
celt_params.sample_rate = Some(SAMPLE_RATE);
celt_params.codec_id = CodecId::new(oxideav_celt::CODEC_ID_STR);
let celt = CeltEncoder::new(&celt_params)?;
let mut out_params = params.clone();
out_params.sample_rate = Some(SAMPLE_RATE);
out_params.channels = Some(channels);
Ok(Self {
out_params,
input_channels: channels,
celt,
output: VecDeque::new(),
pts_counter: 0,
})
}
pub fn new_celt_only_full_band(params: &CodecParameters) -> Result<Self> {
let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
if sr != SAMPLE_RATE {
return Err(Error::unsupported(format!(
"opus encoder (CELT-only FB): input must be 48 kHz, got {sr}"
)));
}
Self::new(params)
}
fn drain_celt(&mut self) -> Result<()> {
let toc = build_toc_byte(false);
loop {
match self.celt.receive_packet() {
Ok(celt_pkt) => {
let mut data = Vec::with_capacity(1 + celt_pkt.data.len());
data.push(toc);
data.extend_from_slice(&celt_pkt.data);
let tb = TimeBase::new(1, SAMPLE_RATE as i64);
let pts = self.pts_counter;
self.pts_counter += OPUS_FRAME_SAMPLES as i64;
let pkt = Packet::new(0, tb, data)
.with_pts(pts)
.with_duration(OPUS_FRAME_SAMPLES as i64);
self.output.push_back(pkt);
}
Err(Error::NeedMore) => return Ok(()),
Err(e) => return Err(e),
}
}
}
}
impl Encoder for OpusEncoder {
fn codec_id(&self) -> &CodecId {
&self.out_params.codec_id
}
fn output_params(&self) -> &CodecParameters {
&self.out_params
}
fn send_frame(&mut self, frame: &Frame) -> Result<()> {
let audio = match frame {
Frame::Audio(a) => a,
_ => {
return Err(Error::invalid(
"opus encoder: expected audio frame, got video",
))
}
};
if audio.sample_rate != SAMPLE_RATE {
return Err(Error::unsupported(format!(
"opus encoder: input must be 48 kHz (got {}); resample before encoding",
audio.sample_rate
)));
}
if audio.channels != self.input_channels {
return Err(Error::invalid(format!(
"opus encoder: frame channels ({}) differ from configured input channels ({})",
audio.channels, self.input_channels
)));
}
let mono = extract_mono_f32(audio)?;
let mut bytes = Vec::with_capacity(mono.len() * 4);
for &s in &mono {
bytes.extend_from_slice(&s.to_le_bytes());
}
let celt_frame = Frame::Audio(AudioFrame {
format: SampleFormat::F32,
channels: 1,
sample_rate: SAMPLE_RATE,
samples: mono.len() as u32,
pts: audio.pts,
time_base: TimeBase::new(1, SAMPLE_RATE as i64),
data: vec![bytes],
});
self.celt.send_frame(&celt_frame)?;
self.drain_celt()
}
fn receive_packet(&mut self) -> Result<Packet> {
if let Some(p) = self.output.pop_front() {
Ok(p)
} else {
Err(Error::NeedMore)
}
}
fn flush(&mut self) -> Result<()> {
self.celt.flush()?;
self.drain_celt()?;
Ok(())
}
}
fn extract_mono_f32(audio: &AudioFrame) -> Result<Vec<f32>> {
let n = audio.samples as usize;
let ch = audio.channels as usize;
if ch == 0 {
return Err(Error::invalid("opus encoder: 0-channel audio frame"));
}
let mut out = vec![0f32; n];
match audio.format {
SampleFormat::S16 => {
let bytes = &audio.data[0];
let needed = n * ch * 2;
if bytes.len() < needed {
return Err(Error::invalid(
"opus encoder: S16 input shorter than declared sample count",
));
}
for i in 0..n {
let mut acc = 0i32;
for c in 0..ch {
let off = (i * ch + c) * 2;
let s = i16::from_le_bytes([bytes[off], bytes[off + 1]]);
acc += s as i32;
}
out[i] = (acc as f32) / (ch as f32 * 32768.0);
}
}
SampleFormat::S16P => {
if audio.data.len() < ch {
return Err(Error::invalid("opus encoder: S16P input missing planes"));
}
for i in 0..n {
let mut acc = 0i32;
for c in 0..ch {
let plane = &audio.data[c];
if plane.len() < n * 2 {
return Err(Error::invalid(
"opus encoder: S16P plane shorter than declared sample count",
));
}
let off = i * 2;
let s = i16::from_le_bytes([plane[off], plane[off + 1]]);
acc += s as i32;
}
out[i] = (acc as f32) / (ch as f32 * 32768.0);
}
}
SampleFormat::F32 => {
let bytes = &audio.data[0];
let needed = n * ch * 4;
if bytes.len() < needed {
return Err(Error::invalid(
"opus encoder: F32 input shorter than declared sample count",
));
}
for i in 0..n {
let mut acc = 0f32;
for c in 0..ch {
let off = (i * ch + c) * 4;
acc += f32::from_le_bytes([
bytes[off],
bytes[off + 1],
bytes[off + 2],
bytes[off + 3],
]);
}
out[i] = acc / ch as f32;
}
}
SampleFormat::F32P => {
if audio.data.len() < ch {
return Err(Error::invalid("opus encoder: F32P input missing planes"));
}
for i in 0..n {
let mut acc = 0f32;
for c in 0..ch {
let plane = &audio.data[c];
if plane.len() < n * 4 {
return Err(Error::invalid(
"opus encoder: F32P plane shorter than declared sample count",
));
}
let off = i * 4;
acc += f32::from_le_bytes([
plane[off],
plane[off + 1],
plane[off + 2],
plane[off + 3],
]);
}
out[i] = acc / ch as f32;
}
}
other => {
return Err(Error::unsupported(format!(
"opus encoder: sample format {:?} not supported (use S16 / S16P / F32 / F32P)",
other
)));
}
}
let _ = FRAME_SAMPLES;
Ok(out)
}
pub fn make_encoder(params: &CodecParameters) -> Result<Box<dyn Encoder>> {
Ok(Box::new(OpusEncoder::new(params)?))
}
pub const OPUS_CONFIG_SILK_NB_20MS: u8 = 1;
pub const SILK_NB_FRAME_SAMPLES_INTERNAL: usize = 160;
pub const SILK_FRAME_SAMPLES_48K: usize = 960;
pub const SILK_NB_RATE: u32 = 8_000;
const DOWNSAMPLE_RATIO: usize = (SAMPLE_RATE / SILK_NB_RATE) as usize;
pub fn build_silk_nb_20ms_toc(stereo: bool) -> u8 {
let stereo_bit: u8 = if stereo { 1 } else { 0 };
(OPUS_CONFIG_SILK_NB_20MS << 3) | (stereo_bit << 2) }
pub struct SilkEncoder {
out_params: CodecParameters,
silk: crate::silk::encoder::SilkFrameEncoder,
pending_internal: VecDeque<f32>,
input_channels: u16,
input_sample_rate: u32,
output: VecDeque<Packet>,
pts_counter: i64,
}
impl SilkEncoder {
pub fn new_nb_mono_20ms(params: &CodecParameters) -> Result<Self> {
let channels = params.channels.unwrap_or(1);
if channels != 1 {
return Err(Error::unsupported(format!(
"SILK NB mono encoder: only mono input supported, got {channels} channels"
)));
}
let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
if sr != SILK_NB_RATE && sr != SAMPLE_RATE {
return Err(Error::unsupported(format!(
"SILK NB mono encoder: input must be 8 kHz or 48 kHz, got {sr} Hz"
)));
}
let mut out_params = params.clone();
out_params.sample_rate = Some(SAMPLE_RATE);
out_params.channels = Some(1);
Ok(Self {
out_params,
silk: crate::silk::encoder::SilkFrameEncoder::new_nb_20ms(),
pending_internal: VecDeque::with_capacity(SILK_NB_FRAME_SAMPLES_INTERNAL * 2),
input_channels: 1,
input_sample_rate: sr,
output: VecDeque::new(),
pts_counter: 0,
})
}
fn drain_frames(&mut self) -> Result<()> {
while self.pending_internal.len() >= SILK_NB_FRAME_SAMPLES_INTERNAL {
let mut frame = Vec::with_capacity(SILK_NB_FRAME_SAMPLES_INTERNAL);
for _ in 0..SILK_NB_FRAME_SAMPLES_INTERNAL {
frame.push(self.pending_internal.pop_front().unwrap_or(0.0));
}
let pkt = self.encode_one_frame(&frame)?;
self.output.push_back(pkt);
}
Ok(())
}
fn encode_one_frame(&mut self, pcm_internal: &[f32]) -> Result<Packet> {
debug_assert_eq!(pcm_internal.len(), SILK_NB_FRAME_SAMPLES_INTERNAL);
let mut re = oxideav_celt::range_encoder::RangeEncoder::new(384);
re.encode_bit_logp(true, 1); re.encode_bit_logp(false, 1);
self.silk.encode_frame_body(pcm_internal, &mut re)?;
let body = re
.done()
.map_err(|e| Error::other(format!("SILK encoder: {e}")))?;
let body = strip_trailing_zeros(body);
let toc = build_silk_nb_20ms_toc(false);
let mut data = Vec::with_capacity(1 + body.len());
data.push(toc);
data.extend_from_slice(&body);
let tb = TimeBase::new(1, SAMPLE_RATE as i64);
let pts = self.pts_counter;
self.pts_counter += SILK_FRAME_SAMPLES_48K as i64;
Ok(Packet::new(0, tb, data)
.with_pts(pts)
.with_duration(SILK_FRAME_SAMPLES_48K as i64))
}
}
fn strip_trailing_zeros(mut v: Vec<u8>) -> Vec<u8> {
while v.len() > 1 && v.last() == Some(&0) {
v.pop();
}
v
}
impl Encoder for SilkEncoder {
fn codec_id(&self) -> &CodecId {
&self.out_params.codec_id
}
fn output_params(&self) -> &CodecParameters {
&self.out_params
}
fn send_frame(&mut self, frame: &Frame) -> Result<()> {
let audio = match frame {
Frame::Audio(a) => a,
_ => {
return Err(Error::invalid(
"SILK encoder: expected audio frame, got video",
))
}
};
if audio.channels != self.input_channels {
return Err(Error::invalid(format!(
"SILK encoder: frame channels ({}) differ from configured input channels ({})",
audio.channels, self.input_channels
)));
}
if audio.sample_rate != self.input_sample_rate {
return Err(Error::unsupported(format!(
"SILK encoder: input sample rate ({}) differs from configured rate ({}); reconfigure or resample first",
audio.sample_rate, self.input_sample_rate
)));
}
let mono = extract_mono_f32(audio)?;
let internal_samples: Vec<f32> = if audio.sample_rate == SILK_NB_RATE {
mono
} else {
downsample_box(&mono, DOWNSAMPLE_RATIO)
};
self.pending_internal.extend(&internal_samples);
self.drain_frames()
}
fn receive_packet(&mut self) -> Result<Packet> {
if let Some(p) = self.output.pop_front() {
Ok(p)
} else {
Err(Error::NeedMore)
}
}
fn flush(&mut self) -> Result<()> {
if !self.pending_internal.is_empty() {
while self.pending_internal.len() % SILK_NB_FRAME_SAMPLES_INTERNAL != 0 {
self.pending_internal.push_back(0.0);
}
self.drain_frames()?;
}
Ok(())
}
}
fn downsample_box(input: &[f32], ratio: usize) -> Vec<f32> {
if ratio <= 1 {
return input.to_vec();
}
let n_out = input.len() / ratio;
let mut out = Vec::with_capacity(n_out);
for i in 0..n_out {
let mut sum = 0f32;
for k in 0..ratio {
sum += input[i * ratio + k];
}
out.push(sum / ratio as f32);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn toc_byte_mono() {
let b = build_toc_byte(false);
assert_eq!(b >> 3, 31, "config should be 31");
assert_eq!((b >> 2) & 1, 0, "stereo bit should be 0");
assert_eq!(b & 0x3, 0, "framing code should be 0");
}
#[test]
fn toc_byte_stereo() {
let b = build_toc_byte(true);
assert_eq!(b >> 3, 31, "config should be 31");
assert_eq!((b >> 2) & 1, 1, "stereo bit should be 1");
assert_eq!(b & 0x3, 0, "framing code should be 0");
}
#[test]
fn rejects_non_48k() {
let mut p = CodecParameters::audio(CodecId::new("opus"));
p.channels = Some(1);
p.sample_rate = Some(44_100);
match OpusEncoder::new(&p) {
Err(Error::Unsupported(_)) => {}
Err(e) => panic!("expected Unsupported, got {e:?}"),
Ok(_) => panic!("expected Unsupported, got Ok"),
}
}
#[test]
fn rejects_more_than_stereo() {
let mut p = CodecParameters::audio(CodecId::new("opus"));
p.channels = Some(6);
p.sample_rate = Some(SAMPLE_RATE);
match OpusEncoder::new(&p) {
Err(Error::Unsupported(_)) => {}
Err(e) => panic!("expected Unsupported, got {e:?}"),
Ok(_) => panic!("expected Unsupported, got Ok"),
}
}
#[test]
fn new_celt_only_fb_accepts_48k_mono() {
let mut p = CodecParameters::audio(CodecId::new("opus"));
p.channels = Some(1);
p.sample_rate = Some(SAMPLE_RATE);
assert!(OpusEncoder::new_celt_only_full_band(&p).is_ok());
}
#[test]
fn new_celt_only_fb_rejects_non_48k() {
let mut p = CodecParameters::audio(CodecId::new("opus"));
p.channels = Some(1);
p.sample_rate = Some(16_000);
match OpusEncoder::new_celt_only_full_band(&p) {
Err(Error::Unsupported(_)) => {}
Err(e) => panic!("expected Unsupported, got {e:?}"),
Ok(_) => panic!("expected Unsupported, got Ok"),
}
}
#[test]
fn mono_encoder_produces_toc_byte() {
let mut p = CodecParameters::audio(CodecId::new("opus"));
p.channels = Some(1);
p.sample_rate = Some(SAMPLE_RATE);
let mut enc = OpusEncoder::new(&p).unwrap();
let bytes = vec![0u8; OPUS_FRAME_SAMPLES * 2];
let frame = Frame::Audio(AudioFrame {
format: SampleFormat::S16,
channels: 1,
sample_rate: SAMPLE_RATE,
samples: OPUS_FRAME_SAMPLES as u32,
pts: None,
time_base: TimeBase::new(1, SAMPLE_RATE as i64),
data: vec![bytes],
});
enc.send_frame(&frame).unwrap();
let pkt = enc.receive_packet().unwrap();
assert!(!pkt.data.is_empty(), "packet must contain TOC + bitstream");
let toc = pkt.data[0];
assert_eq!(toc >> 3, 31, "config should be 31");
assert_eq!((toc >> 2) & 1, 0, "mono → stereo bit = 0");
assert_eq!(toc & 0x3, 0, "single-frame packet → code 0");
}
}