codec/audio/mod.rs
1//! Audio codec framework.
2//!
3//! Squad-24 (2026-04-17 PM5): adds the decoder/encoder traits + the wire
4//! types Squad-23 (audio mux pipeline) consumes. Decoders cover MP3 and
5//! Vorbis (mux already handles AAC/Opus/AC-3 passthrough — no decode
6//! needed for those). The encoder side currently exposes Opus only;
7//! the user decision on the audio expansion in TODO.md picked Opus over
8//! AAC because the libopus binding is BSD/Apache, modern browsers all
9//! play Opus-in-MP4, and the iOS-13-and-older floor is acceptable.
10//!
11//! Wire model
12//! ----------
13//! - [`AudioFrame`] is the canonical PCM exchange type: f32 in
14//! [-1.0, 1.0], interleaved planar layout (LRLRLR for stereo), with
15//! the source sample rate and channel count carried alongside the
16//! samples and a microsecond-domain PTS.
17//! - [`EncodedAudioPacket`] carries one encoder output packet plus
18//! PTS/duration in encoder timescale (Opus = 48000 ticks per second
19//! per RFC 7845 §4.1).
20//! - [`AudioDecoder`] / [`AudioEncoder`] traits are object-safe so
21//! pipeline code can hand out `Box<dyn AudioEncoder>`.
22//!
23//! Pre-skip + extra_data contract (Opus-specific)
24//! ----------------------------------------------
25//! [`AudioEncoder::pre_skip`] returns the number of *48 kHz* samples of
26//! lookahead the libopus encoder injects (queried via
27//! `OPUS_GET_LOOKAHEAD` and reported in 48 kHz ticks no matter the
28//! configured rate). Squad-23's mux side writes this into the `dOps`
29//! body so a conformant decoder discards the lookahead at the start of
30//! the file.
31//!
32//! [`AudioEncoder::extra_data`] returns the `dOps` body bytes per RFC
33//! 7845 §4.5: 11 bytes minimum, channel-mapping family 0 (mono/stereo).
34//! Multistream (>2 channels) is out of scope for this sprint and
35//! returns [`AudioError::Unsupported`].
36
37pub mod decode;
38pub mod encode;
39pub mod resample;
40
41#[derive(thiserror::Error, Debug)]
42pub enum AudioError {
43 #[error("decode failed: {0}")]
44 Decode(String),
45 #[error("encode failed: {0}")]
46 Encode(String),
47 #[error("resample failed: {0}")]
48 Resample(String),
49 #[error("unsupported: {0}")]
50 Unsupported(String),
51}
52
53/// One decoded audio frame.
54///
55/// `samples` is interleaved planar — for stereo the layout is
56/// `[L0, R0, L1, R1, ...]`, length `frames * channels`. Values are
57/// f32 in `[-1.0, 1.0]`. The encoder side accepts the same layout.
58#[derive(Clone, Debug)]
59pub struct AudioFrame {
60 /// Interleaved planar samples (LRLRLR for stereo) in `[-1.0, 1.0]`.
61 pub samples: Vec<f32>,
62 pub sample_rate: u32,
63 pub channels: u8,
64 /// Presentation timestamp, microseconds, signed (allows negative
65 /// pre-roll positions for codecs that emit lookahead frames before
66 /// PTS=0 — Opus uses pre_skip rather than negative PTS, but this
67 /// keeps the type general).
68 pub pts: i64,
69}
70
71/// One encoded audio packet leaving the encoder.
72#[derive(Clone, Debug)]
73pub struct EncodedAudioPacket {
74 pub data: Vec<u8>,
75 /// PTS in microseconds (matches `AudioFrame::pts` domain).
76 pub pts: i64,
77 /// Duration in encoder timescale ticks. For Opus this is 48000
78 /// ticks/sec (one 20 ms frame = 960 ticks).
79 pub duration: i64,
80}
81
82#[derive(Clone, Debug)]
83pub struct AudioEncoderConfig {
84 pub codec: AudioCodec,
85 /// Input sample rate the caller will feed [`AudioEncoder::encode`].
86 /// The encoder transparently resamples to its native rate (48 kHz
87 /// for Opus) when this differs.
88 pub sample_rate: u32,
89 pub channels: u8,
90 /// Target bitrate in bits per second.
91 pub bitrate: u32,
92}
93
94#[derive(Clone, Copy, Debug, PartialEq, Eq)]
95pub enum AudioCodec {
96 Opus,
97}
98
99pub trait AudioDecoder: Send {
100 /// Decode one input packet at the given PTS (microseconds). May
101 /// return zero or more output frames (zero is normal — some
102 /// decoders need to see two frames before emitting one).
103 fn decode(&mut self, packet: &[u8], pts: i64) -> Result<Vec<AudioFrame>, AudioError>;
104
105 /// Drain any frames buffered inside the decoder. Call once at EOS.
106 fn flush(&mut self) -> Result<Vec<AudioFrame>, AudioError>;
107}
108
109pub trait AudioEncoder: Send {
110 /// Encode one input frame. The encoder buffers up to one output
111 /// frame's worth of samples internally — Opus's smallest frame is
112 /// 2.5 ms, default 20 ms — so this returns 0..N packets.
113 fn encode(&mut self, frame: &AudioFrame) -> Result<Vec<EncodedAudioPacket>, AudioError>;
114
115 /// Drain any buffered samples. May produce a final partial packet.
116 fn flush(&mut self) -> Result<Vec<EncodedAudioPacket>, AudioError>;
117
118 /// Lookahead samples at 48 kHz (Opus convention). For Opus,
119 /// queried via `OPUS_GET_LOOKAHEAD` and scaled to 48 kHz when the
120 /// encoder is internally running at a non-48k rate.
121 fn pre_skip(&self) -> u16;
122
123 /// The codec-specific extra_data the muxer puts in the sample
124 /// entry's config box. For Opus this is the `dOps` body per RFC
125 /// 7845 §4.5 (11 bytes for channel-mapping family 0).
126 fn extra_data(&self) -> Vec<u8>;
127}
128
129/// Construct an audio decoder for the given codec name.
130///
131/// `codec` is matched case-insensitively. Supported tokens:
132/// - `mp3` / `mpeg`
133/// - `vorbis` (raw audio packet form — caller is responsible for
134/// feeding the three Xiph setup packets first via the `extra_data`
135/// parameter on first construction, then the audio packets via
136/// `decode`)
137///
138/// `extra_data`, `sample_rate`, and `channels` come from the demux
139/// side's container metadata. For codecs that carry full setup in the
140/// stream (MP3) `extra_data` may be `None`.
141pub fn create_decoder(
142 codec: &str,
143 extra_data: Option<&[u8]>,
144 sample_rate: u32,
145 channels: u8,
146) -> Result<Box<dyn AudioDecoder>, AudioError> {
147 match codec.to_ascii_lowercase().as_str() {
148 "mp3" | "mpeg" | "mp3a" => Ok(Box::new(decode::mp3::Mp3Decoder::new(
149 sample_rate,
150 channels,
151 )?)),
152 "vorbis" => Ok(Box::new(decode::vorbis::VorbisDecoder::new(
153 extra_data,
154 sample_rate,
155 channels,
156 )?)),
157 other => Err(AudioError::Unsupported(format!(
158 "audio decoder for codec {other}"
159 ))),
160 }
161}
162
163/// Construct an audio encoder.
164pub fn create_encoder(config: AudioEncoderConfig) -> Result<Box<dyn AudioEncoder>, AudioError> {
165 match config.codec {
166 AudioCodec::Opus => Ok(Box::new(encode::opus::OpusEncoder::new(config)?)),
167 }
168}