Skip to main content

codec/audio/
mod.rs

1//! Audio codec framework.
2//!
3//! Squad-24 (2026-04-17 PM5): adds the decoder/encoder traits + the wire
4//! types Squad-23 (audio mux pipeline) consumes. Decoders cover MP3 and
5//! Vorbis (mux already handles AAC/Opus/AC-3 passthrough — no decode
6//! needed for those). The encoder side currently exposes Opus only;
7//! the user decision on the audio expansion in TODO.md picked Opus over
8//! AAC because the libopus binding is BSD/Apache, modern browsers all
9//! play Opus-in-MP4, and the iOS-13-and-older floor is acceptable.
10//!
11//! Wire model
12//! ----------
13//! - [`AudioFrame`] is the canonical PCM exchange type: f32 in
14//!   [-1.0, 1.0], interleaved planar layout (LRLRLR for stereo), with
15//!   the source sample rate and channel count carried alongside the
16//!   samples and a microsecond-domain PTS.
17//! - [`EncodedAudioPacket`] carries one encoder output packet plus
18//!   PTS/duration in encoder timescale (Opus = 48000 ticks per second
19//!   per RFC 7845 §4.1).
20//! - [`AudioDecoder`] / [`AudioEncoder`] traits are object-safe so
21//!   pipeline code can hand out `Box<dyn AudioEncoder>`.
22//!
23//! Pre-skip + extra_data contract (Opus-specific)
24//! ----------------------------------------------
25//! [`AudioEncoder::pre_skip`] returns the number of *48 kHz* samples of
26//! lookahead the libopus encoder injects (queried via
27//! `OPUS_GET_LOOKAHEAD` and reported in 48 kHz ticks no matter the
28//! configured rate). Squad-23's mux side writes this into the `dOps`
29//! body so a conformant decoder discards the lookahead at the start of
30//! the file.
31//!
32//! [`AudioEncoder::extra_data`] returns the `dOps` body bytes per RFC
33//! 7845 §4.5: 11 bytes minimum, channel-mapping family 0 (mono/stereo).
34//! Multistream (>2 channels) is out of scope for this sprint and
35//! returns [`AudioError::Unsupported`].
36
37pub mod decode;
38pub mod encode;
39pub mod resample;
40
41#[derive(thiserror::Error, Debug)]
42pub enum AudioError {
43    #[error("decode failed: {0}")]
44    Decode(String),
45    #[error("encode failed: {0}")]
46    Encode(String),
47    #[error("resample failed: {0}")]
48    Resample(String),
49    #[error("unsupported: {0}")]
50    Unsupported(String),
51}
52
53/// One decoded audio frame.
54///
55/// `samples` is interleaved planar — for stereo the layout is
56/// `[L0, R0, L1, R1, ...]`, length `frames * channels`. Values are
57/// f32 in `[-1.0, 1.0]`. The encoder side accepts the same layout.
58#[derive(Clone, Debug)]
59pub struct AudioFrame {
60    /// Interleaved planar samples (LRLRLR for stereo) in `[-1.0, 1.0]`.
61    pub samples: Vec<f32>,
62    pub sample_rate: u32,
63    pub channels: u8,
64    /// Presentation timestamp, microseconds, signed (allows negative
65    /// pre-roll positions for codecs that emit lookahead frames before
66    /// PTS=0 — Opus uses pre_skip rather than negative PTS, but this
67    /// keeps the type general).
68    pub pts: i64,
69}
70
71/// One encoded audio packet leaving the encoder.
72#[derive(Clone, Debug)]
73pub struct EncodedAudioPacket {
74    pub data: Vec<u8>,
75    /// PTS in microseconds (matches `AudioFrame::pts` domain).
76    pub pts: i64,
77    /// Duration in encoder timescale ticks. For Opus this is 48000
78    /// ticks/sec (one 20 ms frame = 960 ticks).
79    pub duration: i64,
80}
81
82#[derive(Clone, Debug)]
83pub struct AudioEncoderConfig {
84    pub codec: AudioCodec,
85    /// Input sample rate the caller will feed [`AudioEncoder::encode`].
86    /// The encoder transparently resamples to its native rate (48 kHz
87    /// for Opus) when this differs.
88    pub sample_rate: u32,
89    pub channels: u8,
90    /// Target bitrate in bits per second.
91    pub bitrate: u32,
92}
93
94#[derive(Clone, Copy, Debug, PartialEq, Eq)]
95pub enum AudioCodec {
96    Opus,
97}
98
99pub trait AudioDecoder: Send {
100    /// Decode one input packet at the given PTS (microseconds). May
101    /// return zero or more output frames (zero is normal — some
102    /// decoders need to see two frames before emitting one).
103    fn decode(&mut self, packet: &[u8], pts: i64) -> Result<Vec<AudioFrame>, AudioError>;
104
105    /// Drain any frames buffered inside the decoder. Call once at EOS.
106    fn flush(&mut self) -> Result<Vec<AudioFrame>, AudioError>;
107}
108
109pub trait AudioEncoder: Send {
110    /// Encode one input frame. The encoder buffers up to one output
111    /// frame's worth of samples internally — Opus's smallest frame is
112    /// 2.5 ms, default 20 ms — so this returns 0..N packets.
113    fn encode(&mut self, frame: &AudioFrame) -> Result<Vec<EncodedAudioPacket>, AudioError>;
114
115    /// Drain any buffered samples. May produce a final partial packet.
116    fn flush(&mut self) -> Result<Vec<EncodedAudioPacket>, AudioError>;
117
118    /// Lookahead samples at 48 kHz (Opus convention). For Opus,
119    /// queried via `OPUS_GET_LOOKAHEAD` and scaled to 48 kHz when the
120    /// encoder is internally running at a non-48k rate.
121    fn pre_skip(&self) -> u16;
122
123    /// The codec-specific extra_data the muxer puts in the sample
124    /// entry's config box. For Opus this is the `dOps` body per RFC
125    /// 7845 §4.5 (11 bytes for channel-mapping family 0).
126    fn extra_data(&self) -> Vec<u8>;
127}
128
129/// Construct an audio decoder for the given codec name.
130///
131/// `codec` is matched case-insensitively. Supported tokens:
132/// - `mp3` / `mpeg`
133/// - `vorbis` (raw audio packet form — caller is responsible for
134///   feeding the three Xiph setup packets first via the `extra_data`
135///   parameter on first construction, then the audio packets via
136///   `decode`)
137///
138/// `extra_data`, `sample_rate`, and `channels` come from the demux
139/// side's container metadata. For codecs that carry full setup in the
140/// stream (MP3) `extra_data` may be `None`.
141pub fn create_decoder(
142    codec: &str,
143    extra_data: Option<&[u8]>,
144    sample_rate: u32,
145    channels: u8,
146) -> Result<Box<dyn AudioDecoder>, AudioError> {
147    match codec.to_ascii_lowercase().as_str() {
148        "mp3" | "mpeg" | "mp3a" => Ok(Box::new(decode::mp3::Mp3Decoder::new(
149            sample_rate,
150            channels,
151        )?)),
152        "vorbis" => Ok(Box::new(decode::vorbis::VorbisDecoder::new(
153            extra_data,
154            sample_rate,
155            channels,
156        )?)),
157        other => Err(AudioError::Unsupported(format!(
158            "audio decoder for codec {other}"
159        ))),
160    }
161}
162
163/// Construct an audio encoder.
164pub fn create_encoder(config: AudioEncoderConfig) -> Result<Box<dyn AudioEncoder>, AudioError> {
165    match config.codec {
166        AudioCodec::Opus => Ok(Box::new(encode::opus::OpusEncoder::new(config)?)),
167    }
168}