codec/audio/encode/opus/mod.rs
1//! Opus encoder wrapping `audiopus` (libopus FFI; libopus is BSD,
2//! audiopus is ISC). Squad-23's MP4 mux side consumes the packets +
3//! `extra_data()` (dOps body per RFC 7845 §4.5) + `pre_skip()` (samples
4//! at 48 kHz queried via `OPUS_GET_LOOKAHEAD`).
5//!
6//! Constraints we enforce on the Opus side:
7//! - Native sample rates are 8/12/16/24/48 kHz only. We always run the
8//! internal libopus encoder at 48 kHz and resample the input ourselves
9//! via [`AudioResampler`] when the source isn't 48 k. This keeps
10//! pre_skip semantics simple (always reported in 48 kHz ticks per the
11//! RFC) and means the dOps `InputSampleRate` field cleanly reflects
12//! the original source rate.
13//! - Frame sizes must be 2.5/5/10/20/40/60 ms. We use 20 ms = 960
14//! samples at 48 kHz. This is libopus's default and matches what
15//! browsers / WebRTC expect.
16//! - Channels: 1 (mono) and 2 (stereo) use the regular `audiopus::coder::Encoder`
17//! API. 3..=8 channels (3.0 / quad / 5.0 / 5.1 / 6.1 / 7.1) use the
18//! libopus Multistream API via `audiopus_sys` FFI (Squad-28). Channel
19//! counts above 8 return [`AudioError::Unsupported`] — RFC 7845
20//! §5.1.1.2 only specifies channel-mapping family 1 for 1..=8 channels.
21//!
22//! Defaults
23//! --------
24//! - 96 kbps for stereo, 64 kbps for mono if the caller passes 0.
25//! Multichannel: 64 kbps per uncoupled stream + 96 kbps per coupled
26//! stream (so 5.1 = 96 + 96 + 64 + 64 = 320 kbps total) — well above
27//! transparency for music/speech (Opus reaches transparency around
28//! 64 kbps stereo for music).
29//! - Application = `Audio` (vs Voip / LowDelay): tuned for fidelity
30//! over latency. Latency from a 20 ms frame size + ~6.5 ms libopus
31//! lookahead is ~26 ms one-way which is fine for offline transcode.
32//!
33//! Multistream API (Squad-28)
34//! --------------------------
35//! `audiopus 0.3.0-rc.0` ships a `multistream = []` Cargo feature that's
36//! a stub — it gates no Rust code (the high-level wrapper just doesn't
37//! exist for the multistream side in this crate version). We call the
38//! underlying FFI symbols directly via `audiopus::ffi::*` (which re-exports
39//! `audiopus_sys 0.2.2`'s `opus_multistream_encoder_*` functions). The
40//! channel-mapping family 1 layouts we wire follow RFC 7845 §5.1.1.2
41//! verbatim (3.0 / quad / 5.0 / 5.1 / 6.1 / 7.1).
42
43use audiopus::Application;
44use audiopus::Bitrate;
45use audiopus::Channels as OpusChannels;
46use audiopus::SampleRate;
47use audiopus::coder::Encoder as OpusEncoderInner;
48
49use crate::audio::resample::AudioResampler;
50use crate::audio::{
51 AudioCodec, AudioEncoder, AudioEncoderConfig, AudioError, AudioFrame, EncodedAudioPacket,
52};
53
54mod dops;
55mod multistream;
56
57#[cfg(test)]
58mod tests;
59
60use dops::build_dops;
61use multistream::{MultistreamEncoder, surround_mapping_family_1};
62
63/// 20 ms frame at 48 kHz = 960 samples per channel. This is the
64/// default/recommended Opus frame size.
65const OPUS_FRAME_SAMPLES_48K: usize = 960;
66/// Internal encoder rate we always run libopus at. Resample to here
67/// from any source rate. Per RFC 7845, pre_skip is always counted in
68/// 48 kHz ticks regardless.
69const OPUS_INTERNAL_RATE: u32 = 48_000;
70/// Maximum bytes per Opus packet per RFC 6716 §3.4 — actual bound is
71/// 1275 for 60ms VBR + multistream overhead; we round up to 4000 as
72/// audiopus does, which gives a comfortable margin. For multistream
73/// the per-frame budget scales with stream count; we use the standard
74/// libopus bound of 1275 bytes per stream and cap at 8 streams = 10200
75/// bytes (rounded up to 16384 for headroom).
76const OPUS_MAX_PACKET_BYTES: usize = 4000;
77const OPUS_MAX_MS_PACKET_BYTES: usize = 16_384;
78/// Default bitrates per channel-count, in bits/second.
79const DEFAULT_BITRATE_MONO: u32 = 64_000;
80const DEFAULT_BITRATE_STEREO: u32 = 96_000;
81
82/// Internal dispatch — regular libopus encoder for 1/2 channels, or
83/// multistream encoder for 3..=8 channels. The two paths converge at
84/// the [`AudioEncoder`] trait surface.
85enum OpusInner {
86 Regular(OpusEncoderInner),
87 /// Owned `OpusMSEncoder*` from `opus_multistream_encoder_create`.
88 /// Freed via `opus_multistream_encoder_destroy` in `Drop`.
89 Multistream(MultistreamEncoder),
90}
91
92pub struct OpusEncoder {
93 inner: OpusInner,
94 /// Source sample rate the caller will feed.
95 in_rate: u32,
96 /// Channel count (1..=8).
97 channels: u8,
98 /// Resampler when in_rate != 48 kHz, else None.
99 resampler: Option<AudioResampler>,
100 /// Carry of resampled (or directly-fed) samples that didn't fill a
101 /// full Opus frame yet. Interleaved planar f32.
102 sample_carry: Vec<f32>,
103 /// pre_skip in 48 kHz samples — captured at construction.
104 pre_skip_48k: u16,
105 /// dOps body bytes per RFC 7845 §4.5 — built once at construction
106 /// from in_rate + channels + pre_skip + (when multichannel)
107 /// streams + coupled_streams + channel mapping.
108 extra_data: Vec<u8>,
109 /// Running PTS in microseconds. Set on first encode call.
110 next_pts_us: Option<i64>,
111 /// Microseconds per Opus frame at the configured frame size.
112 frame_duration_us: i64,
113 /// Reusable encode output buffer to avoid per-frame allocation.
114 encode_out: Vec<u8>,
115}
116
117impl OpusEncoder {
118 pub fn new(config: AudioEncoderConfig) -> Result<Self, AudioError> {
119 if config.codec != AudioCodec::Opus {
120 return Err(AudioError::Encode(format!(
121 "OpusEncoder constructed with codec {:?}",
122 config.codec
123 )));
124 }
125 if config.channels == 0 {
126 return Err(AudioError::Unsupported(
127 "Opus channel count must be >= 1".to_string(),
128 ));
129 }
130 if config.channels > 8 {
131 return Err(AudioError::Unsupported(format!(
132 "Opus supports up to 8 channels (channel-mapping family 1, RFC 7845 §5.1.1.2); \
133 got {} channels",
134 config.channels
135 )));
136 }
137 if config.sample_rate == 0 {
138 return Err(AudioError::Encode("input sample_rate is 0".to_string()));
139 }
140
141 let channels = config.channels;
142
143 // Construct the inner encoder + capture multistream metadata
144 // (streams / coupled_streams / mapping) when on the multistream
145 // path. Both paths converge into a single OpusInner.
146 let (inner, ms_meta, max_packet_bytes) = if channels <= 2 {
147 // Regular API path — Squad-24's original code.
148 let opus_channels = match channels {
149 1 => OpusChannels::Mono,
150 2 => OpusChannels::Stereo,
151 _ => unreachable!("channel-count guarded above"),
152 };
153 let mut enc =
154 OpusEncoderInner::new(SampleRate::Hz48000, opus_channels, Application::Audio)
155 .map_err(|e| AudioError::Encode(format!("opus encoder create: {e}")))?;
156 let bitrate_bps = if config.bitrate == 0 {
157 if channels == 1 {
158 DEFAULT_BITRATE_MONO
159 } else {
160 DEFAULT_BITRATE_STEREO
161 }
162 } else {
163 config.bitrate
164 };
165 enc.set_bitrate(Bitrate::BitsPerSecond(bitrate_bps as i32))
166 .map_err(|e| AudioError::Encode(format!("opus set_bitrate: {e}")))?;
167 // VBR is the audiopus default but we set it explicitly for
168 // documentation; CBR is reserved for streaming use cases not
169 // relevant to file output.
170 enc.set_vbr(true)
171 .map_err(|e| AudioError::Encode(format!("opus set_vbr: {e}")))?;
172 (OpusInner::Regular(enc), None, OPUS_MAX_PACKET_BYTES)
173 } else {
174 // Multistream path: build the family-1 layout, allocate the
175 // libopus multistream encoder via FFI.
176 let (streams, coupled, mapping) = surround_mapping_family_1(channels)?;
177 let mut ms = MultistreamEncoder::new(
178 OPUS_INTERNAL_RATE,
179 channels,
180 streams,
181 coupled,
182 mapping,
183 Application::Audio,
184 )?;
185 // Default aggregate bitrate scales with streams: 96 kbps per
186 // coupled (stereo) + 64 kbps per uncoupled (mono). For 5.1
187 // (4 streams, 2 coupled) this is 2*96 + 2*64 = 320 kbps,
188 // which is the Opus reference default for surround.
189 let bitrate_bps = if config.bitrate == 0 {
190 let coupled_u = coupled as u32;
191 let mono_u = streams as u32 - coupled_u;
192 coupled_u * DEFAULT_BITRATE_STEREO + mono_u * DEFAULT_BITRATE_MONO
193 } else {
194 config.bitrate
195 };
196 ms.set_bitrate(bitrate_bps as i32)?;
197 ms.set_vbr(true)?;
198 (
199 OpusInner::Multistream(ms),
200 Some((streams, coupled, mapping)),
201 OPUS_MAX_MS_PACKET_BYTES,
202 )
203 };
204
205 // Read the lookahead in 48 kHz ticks regardless of which inner
206 // path we took. Both regular + multistream report lookahead in
207 // samples-of-the-configured-rate per libopus convention; we
208 // configure both at 48 kHz so no scaling is needed.
209 let pre_skip_48k_u32 = match &inner {
210 OpusInner::Regular(enc) => enc
211 .lookahead()
212 .map_err(|e| AudioError::Encode(format!("opus lookahead: {e}")))?,
213 OpusInner::Multistream(ms) => ms.lookahead()?,
214 };
215 let pre_skip_48k: u16 = pre_skip_48k_u32.try_into().unwrap_or(u16::MAX);
216
217 // Resampler if needed.
218 let resampler = if config.sample_rate == OPUS_INTERNAL_RATE {
219 None
220 } else {
221 // chunk_size: process 20 ms worth of input at a time so the
222 // resampler output naturally aligns with Opus's 20 ms frame
223 // size. 20 ms at 44.1 kHz = 882 samples, at 22.05 kHz = 441,
224 // etc. We round to the nearest integer.
225 let chunk = ((config.sample_rate as usize) * 20) / 1000;
226 let chunk = chunk.max(1);
227 Some(AudioResampler::new(
228 config.sample_rate,
229 OPUS_INTERNAL_RATE,
230 channels,
231 chunk,
232 )?)
233 };
234
235 let extra_data = build_dops(channels, pre_skip_48k, config.sample_rate, ms_meta);
236
237 let frame_duration_us =
238 (OPUS_FRAME_SAMPLES_48K as i64 * 1_000_000) / OPUS_INTERNAL_RATE as i64;
239
240 Ok(Self {
241 inner,
242 in_rate: config.sample_rate,
243 channels,
244 resampler,
245 sample_carry: Vec::with_capacity(OPUS_FRAME_SAMPLES_48K * channels as usize * 4),
246 pre_skip_48k,
247 extra_data,
248 next_pts_us: None,
249 frame_duration_us,
250 encode_out: vec![0u8; max_packet_bytes],
251 })
252 }
253
254 /// Drain as many full 20-ms Opus frames as possible from
255 /// `sample_carry`. Each successful encode advances `next_pts_us`
256 /// by `frame_duration_us`.
257 fn drain_packets(&mut self) -> Result<Vec<EncodedAudioPacket>, AudioError> {
258 let mut out = Vec::new();
259 let chans = self.channels as usize;
260 let frame_interleaved_len = OPUS_FRAME_SAMPLES_48K * chans;
261 while self.sample_carry.len() >= frame_interleaved_len {
262 // Encode the front-most frame.
263 let frame_slice = &self.sample_carry[..frame_interleaved_len];
264 let n = match &mut self.inner {
265 OpusInner::Regular(enc) => enc
266 .encode_float(frame_slice, &mut self.encode_out)
267 .map_err(|e| AudioError::Encode(format!("opus encode_float: {e}")))?,
268 OpusInner::Multistream(ms) => {
269 ms.encode_float(frame_slice, OPUS_FRAME_SAMPLES_48K, &mut self.encode_out)?
270 }
271 };
272 // n=0 would be a discontinuous-transmission "no packet"
273 // signal — we don't enable DTX so it shouldn't fire, but
274 // defensively skip if it does.
275 if n > 0 {
276 let pts = self.next_pts_us.unwrap_or(0);
277 self.next_pts_us = Some(pts + self.frame_duration_us);
278 out.push(EncodedAudioPacket {
279 data: self.encode_out[..n].to_vec(),
280 pts,
281 duration: OPUS_FRAME_SAMPLES_48K as i64, // 48 kHz ticks
282 });
283 }
284 self.sample_carry.drain(..frame_interleaved_len);
285 }
286 Ok(out)
287 }
288}
289
290impl AudioEncoder for OpusEncoder {
291 fn encode(&mut self, frame: &AudioFrame) -> Result<Vec<EncodedAudioPacket>, AudioError> {
292 // Channel-count gate. Multichannel (3..=8) is now supported via
293 // the Multistream API path (Squad-28); >8 stays Unsupported.
294 if frame.channels == 0 || frame.channels > 8 {
295 return Err(AudioError::Unsupported(format!(
296 "Opus AudioFrame channel count must be 1..=8; got {}",
297 frame.channels
298 )));
299 }
300 if frame.channels != self.channels {
301 return Err(AudioError::Encode(format!(
302 "channel count mismatch: encoder configured for {}, frame has {}",
303 self.channels, frame.channels
304 )));
305 }
306 if frame.sample_rate != self.in_rate {
307 return Err(AudioError::Encode(format!(
308 "sample rate mismatch: encoder configured for {}, frame has {}",
309 self.in_rate, frame.sample_rate
310 )));
311 }
312
313 if self.next_pts_us.is_none() {
314 self.next_pts_us = Some(frame.pts);
315 }
316
317 // Push samples into carry, possibly via resampler.
318 if let Some(r) = self.resampler.as_mut() {
319 r.process(frame, &mut self.sample_carry)?;
320 } else {
321 self.sample_carry.extend_from_slice(&frame.samples);
322 }
323
324 self.drain_packets()
325 }
326
327 fn flush(&mut self) -> Result<Vec<EncodedAudioPacket>, AudioError> {
328 if let Some(r) = self.resampler.as_mut() {
329 r.flush(&mut self.sample_carry)?;
330 }
331 // Pad the final partial frame with silence so libopus can emit
332 // a final packet (mux side will use pre_skip + the file's
333 // total sample count to know where playable audio ends).
334 let chans = self.channels as usize;
335 let frame_interleaved_len = OPUS_FRAME_SAMPLES_48K * chans;
336 if !self.sample_carry.is_empty() && self.sample_carry.len() < frame_interleaved_len {
337 self.sample_carry.resize(frame_interleaved_len, 0.0);
338 }
339 self.drain_packets()
340 }
341
342 fn pre_skip(&self) -> u16 {
343 self.pre_skip_48k
344 }
345
346 fn extra_data(&self) -> Vec<u8> {
347 self.extra_data.clone()
348 }
349}