Skip to main content

codec/audio/encode/opus/
mod.rs

1//! Opus encoder wrapping `audiopus` (libopus FFI; libopus is BSD,
2//! audiopus is ISC). Squad-23's MP4 mux side consumes the packets +
3//! `extra_data()` (dOps body per RFC 7845 §4.5) + `pre_skip()` (samples
4//! at 48 kHz queried via `OPUS_GET_LOOKAHEAD`).
5//!
6//! Constraints we enforce on the Opus side:
7//! - Native sample rates are 8/12/16/24/48 kHz only. We always run the
8//!   internal libopus encoder at 48 kHz and resample the input ourselves
9//!   via [`AudioResampler`] when the source isn't 48 k. This keeps
10//!   pre_skip semantics simple (always reported in 48 kHz ticks per the
11//!   RFC) and means the dOps `InputSampleRate` field cleanly reflects
12//!   the original source rate.
13//! - Frame sizes must be 2.5/5/10/20/40/60 ms. We use 20 ms = 960
14//!   samples at 48 kHz. This is libopus's default and matches what
15//!   browsers / WebRTC expect.
16//! - Channels: 1 (mono) and 2 (stereo) use the regular `audiopus::coder::Encoder`
17//!   API. 3..=8 channels (3.0 / quad / 5.0 / 5.1 / 6.1 / 7.1) use the
18//!   libopus Multistream API via `audiopus_sys` FFI (Squad-28). Channel
19//!   counts above 8 return [`AudioError::Unsupported`] — RFC 7845
20//!   §5.1.1.2 only specifies channel-mapping family 1 for 1..=8 channels.
21//!
22//! Defaults
23//! --------
24//! - 96 kbps for stereo, 64 kbps for mono if the caller passes 0.
25//!   Multichannel: 64 kbps per uncoupled stream + 96 kbps per coupled
26//!   stream (so 5.1 = 96 + 96 + 64 + 64 = 320 kbps total) — well above
27//!   transparency for music/speech (Opus reaches transparency around
28//!   64 kbps stereo for music).
29//! - Application = `Audio` (vs Voip / LowDelay): tuned for fidelity
30//!   over latency. Latency from a 20 ms frame size + ~6.5 ms libopus
31//!   lookahead is ~26 ms one-way which is fine for offline transcode.
32//!
33//! Multistream API (Squad-28)
34//! --------------------------
35//! `audiopus 0.3.0-rc.0` ships a `multistream = []` Cargo feature that's
36//! a stub — it gates no Rust code (the high-level wrapper just doesn't
37//! exist for the multistream side in this crate version). We call the
38//! underlying FFI symbols directly via `audiopus::ffi::*` (which re-exports
39//! `audiopus_sys 0.2.2`'s `opus_multistream_encoder_*` functions). The
40//! channel-mapping family 1 layouts we wire follow RFC 7845 §5.1.1.2
41//! verbatim (3.0 / quad / 5.0 / 5.1 / 6.1 / 7.1).
42
43use audiopus::Application;
44use audiopus::Bitrate;
45use audiopus::Channels as OpusChannels;
46use audiopus::SampleRate;
47use audiopus::coder::Encoder as OpusEncoderInner;
48
49use crate::audio::resample::AudioResampler;
50use crate::audio::{
51    AudioCodec, AudioEncoder, AudioEncoderConfig, AudioError, AudioFrame, EncodedAudioPacket,
52};
53
54mod dops;
55mod multistream;
56
57#[cfg(test)]
58mod tests;
59
60use dops::build_dops;
61use multistream::{MultistreamEncoder, surround_mapping_family_1};
62
63/// 20 ms frame at 48 kHz = 960 samples per channel. This is the
64/// default/recommended Opus frame size.
65const OPUS_FRAME_SAMPLES_48K: usize = 960;
66/// Internal encoder rate we always run libopus at. Resample to here
67/// from any source rate. Per RFC 7845, pre_skip is always counted in
68/// 48 kHz ticks regardless.
69const OPUS_INTERNAL_RATE: u32 = 48_000;
70/// Maximum bytes per Opus packet per RFC 6716 §3.4 — actual bound is
71/// 1275 for 60ms VBR + multistream overhead; we round up to 4000 as
72/// audiopus does, which gives a comfortable margin. For multistream
73/// the per-frame budget scales with stream count; we use the standard
74/// libopus bound of 1275 bytes per stream and cap at 8 streams = 10200
75/// bytes (rounded up to 16384 for headroom).
76const OPUS_MAX_PACKET_BYTES: usize = 4000;
77const OPUS_MAX_MS_PACKET_BYTES: usize = 16_384;
78/// Default bitrates per channel-count, in bits/second.
79const DEFAULT_BITRATE_MONO: u32 = 64_000;
80const DEFAULT_BITRATE_STEREO: u32 = 96_000;
81
82/// Internal dispatch — regular libopus encoder for 1/2 channels, or
83/// multistream encoder for 3..=8 channels. The two paths converge at
84/// the [`AudioEncoder`] trait surface.
85enum OpusInner {
86    Regular(OpusEncoderInner),
87    /// Owned `OpusMSEncoder*` from `opus_multistream_encoder_create`.
88    /// Freed via `opus_multistream_encoder_destroy` in `Drop`.
89    Multistream(MultistreamEncoder),
90}
91
92pub struct OpusEncoder {
93    inner: OpusInner,
94    /// Source sample rate the caller will feed.
95    in_rate: u32,
96    /// Channel count (1..=8).
97    channels: u8,
98    /// Resampler when in_rate != 48 kHz, else None.
99    resampler: Option<AudioResampler>,
100    /// Carry of resampled (or directly-fed) samples that didn't fill a
101    /// full Opus frame yet. Interleaved planar f32.
102    sample_carry: Vec<f32>,
103    /// pre_skip in 48 kHz samples — captured at construction.
104    pre_skip_48k: u16,
105    /// dOps body bytes per RFC 7845 §4.5 — built once at construction
106    /// from in_rate + channels + pre_skip + (when multichannel)
107    /// streams + coupled_streams + channel mapping.
108    extra_data: Vec<u8>,
109    /// Running PTS in microseconds. Set on first encode call.
110    next_pts_us: Option<i64>,
111    /// Microseconds per Opus frame at the configured frame size.
112    frame_duration_us: i64,
113    /// Reusable encode output buffer to avoid per-frame allocation.
114    encode_out: Vec<u8>,
115}
116
117impl OpusEncoder {
118    pub fn new(config: AudioEncoderConfig) -> Result<Self, AudioError> {
119        if config.codec != AudioCodec::Opus {
120            return Err(AudioError::Encode(format!(
121                "OpusEncoder constructed with codec {:?}",
122                config.codec
123            )));
124        }
125        if config.channels == 0 {
126            return Err(AudioError::Unsupported(
127                "Opus channel count must be >= 1".to_string(),
128            ));
129        }
130        if config.channels > 8 {
131            return Err(AudioError::Unsupported(format!(
132                "Opus supports up to 8 channels (channel-mapping family 1, RFC 7845 §5.1.1.2); \
133                 got {} channels",
134                config.channels
135            )));
136        }
137        if config.sample_rate == 0 {
138            return Err(AudioError::Encode("input sample_rate is 0".to_string()));
139        }
140
141        let channels = config.channels;
142
143        // Construct the inner encoder + capture multistream metadata
144        // (streams / coupled_streams / mapping) when on the multistream
145        // path. Both paths converge into a single OpusInner.
146        let (inner, ms_meta, max_packet_bytes) = if channels <= 2 {
147            // Regular API path — Squad-24's original code.
148            let opus_channels = match channels {
149                1 => OpusChannels::Mono,
150                2 => OpusChannels::Stereo,
151                _ => unreachable!("channel-count guarded above"),
152            };
153            let mut enc =
154                OpusEncoderInner::new(SampleRate::Hz48000, opus_channels, Application::Audio)
155                    .map_err(|e| AudioError::Encode(format!("opus encoder create: {e}")))?;
156            let bitrate_bps = if config.bitrate == 0 {
157                if channels == 1 {
158                    DEFAULT_BITRATE_MONO
159                } else {
160                    DEFAULT_BITRATE_STEREO
161                }
162            } else {
163                config.bitrate
164            };
165            enc.set_bitrate(Bitrate::BitsPerSecond(bitrate_bps as i32))
166                .map_err(|e| AudioError::Encode(format!("opus set_bitrate: {e}")))?;
167            // VBR is the audiopus default but we set it explicitly for
168            // documentation; CBR is reserved for streaming use cases not
169            // relevant to file output.
170            enc.set_vbr(true)
171                .map_err(|e| AudioError::Encode(format!("opus set_vbr: {e}")))?;
172            (OpusInner::Regular(enc), None, OPUS_MAX_PACKET_BYTES)
173        } else {
174            // Multistream path: build the family-1 layout, allocate the
175            // libopus multistream encoder via FFI.
176            let (streams, coupled, mapping) = surround_mapping_family_1(channels)?;
177            let mut ms = MultistreamEncoder::new(
178                OPUS_INTERNAL_RATE,
179                channels,
180                streams,
181                coupled,
182                mapping,
183                Application::Audio,
184            )?;
185            // Default aggregate bitrate scales with streams: 96 kbps per
186            // coupled (stereo) + 64 kbps per uncoupled (mono). For 5.1
187            // (4 streams, 2 coupled) this is 2*96 + 2*64 = 320 kbps,
188            // which is the Opus reference default for surround.
189            let bitrate_bps = if config.bitrate == 0 {
190                let coupled_u = coupled as u32;
191                let mono_u = streams as u32 - coupled_u;
192                coupled_u * DEFAULT_BITRATE_STEREO + mono_u * DEFAULT_BITRATE_MONO
193            } else {
194                config.bitrate
195            };
196            ms.set_bitrate(bitrate_bps as i32)?;
197            ms.set_vbr(true)?;
198            (
199                OpusInner::Multistream(ms),
200                Some((streams, coupled, mapping)),
201                OPUS_MAX_MS_PACKET_BYTES,
202            )
203        };
204
205        // Read the lookahead in 48 kHz ticks regardless of which inner
206        // path we took. Both regular + multistream report lookahead in
207        // samples-of-the-configured-rate per libopus convention; we
208        // configure both at 48 kHz so no scaling is needed.
209        let pre_skip_48k_u32 = match &inner {
210            OpusInner::Regular(enc) => enc
211                .lookahead()
212                .map_err(|e| AudioError::Encode(format!("opus lookahead: {e}")))?,
213            OpusInner::Multistream(ms) => ms.lookahead()?,
214        };
215        let pre_skip_48k: u16 = pre_skip_48k_u32.try_into().unwrap_or(u16::MAX);
216
217        // Resampler if needed.
218        let resampler = if config.sample_rate == OPUS_INTERNAL_RATE {
219            None
220        } else {
221            // chunk_size: process 20 ms worth of input at a time so the
222            // resampler output naturally aligns with Opus's 20 ms frame
223            // size. 20 ms at 44.1 kHz = 882 samples, at 22.05 kHz = 441,
224            // etc. We round to the nearest integer.
225            let chunk = ((config.sample_rate as usize) * 20) / 1000;
226            let chunk = chunk.max(1);
227            Some(AudioResampler::new(
228                config.sample_rate,
229                OPUS_INTERNAL_RATE,
230                channels,
231                chunk,
232            )?)
233        };
234
235        let extra_data = build_dops(channels, pre_skip_48k, config.sample_rate, ms_meta);
236
237        let frame_duration_us =
238            (OPUS_FRAME_SAMPLES_48K as i64 * 1_000_000) / OPUS_INTERNAL_RATE as i64;
239
240        Ok(Self {
241            inner,
242            in_rate: config.sample_rate,
243            channels,
244            resampler,
245            sample_carry: Vec::with_capacity(OPUS_FRAME_SAMPLES_48K * channels as usize * 4),
246            pre_skip_48k,
247            extra_data,
248            next_pts_us: None,
249            frame_duration_us,
250            encode_out: vec![0u8; max_packet_bytes],
251        })
252    }
253
254    /// Drain as many full 20-ms Opus frames as possible from
255    /// `sample_carry`. Each successful encode advances `next_pts_us`
256    /// by `frame_duration_us`.
257    fn drain_packets(&mut self) -> Result<Vec<EncodedAudioPacket>, AudioError> {
258        let mut out = Vec::new();
259        let chans = self.channels as usize;
260        let frame_interleaved_len = OPUS_FRAME_SAMPLES_48K * chans;
261        while self.sample_carry.len() >= frame_interleaved_len {
262            // Encode the front-most frame.
263            let frame_slice = &self.sample_carry[..frame_interleaved_len];
264            let n = match &mut self.inner {
265                OpusInner::Regular(enc) => enc
266                    .encode_float(frame_slice, &mut self.encode_out)
267                    .map_err(|e| AudioError::Encode(format!("opus encode_float: {e}")))?,
268                OpusInner::Multistream(ms) => {
269                    ms.encode_float(frame_slice, OPUS_FRAME_SAMPLES_48K, &mut self.encode_out)?
270                }
271            };
272            // n=0 would be a discontinuous-transmission "no packet"
273            // signal — we don't enable DTX so it shouldn't fire, but
274            // defensively skip if it does.
275            if n > 0 {
276                let pts = self.next_pts_us.unwrap_or(0);
277                self.next_pts_us = Some(pts + self.frame_duration_us);
278                out.push(EncodedAudioPacket {
279                    data: self.encode_out[..n].to_vec(),
280                    pts,
281                    duration: OPUS_FRAME_SAMPLES_48K as i64, // 48 kHz ticks
282                });
283            }
284            self.sample_carry.drain(..frame_interleaved_len);
285        }
286        Ok(out)
287    }
288}
289
290impl AudioEncoder for OpusEncoder {
291    fn encode(&mut self, frame: &AudioFrame) -> Result<Vec<EncodedAudioPacket>, AudioError> {
292        // Channel-count gate. Multichannel (3..=8) is now supported via
293        // the Multistream API path (Squad-28); >8 stays Unsupported.
294        if frame.channels == 0 || frame.channels > 8 {
295            return Err(AudioError::Unsupported(format!(
296                "Opus AudioFrame channel count must be 1..=8; got {}",
297                frame.channels
298            )));
299        }
300        if frame.channels != self.channels {
301            return Err(AudioError::Encode(format!(
302                "channel count mismatch: encoder configured for {}, frame has {}",
303                self.channels, frame.channels
304            )));
305        }
306        if frame.sample_rate != self.in_rate {
307            return Err(AudioError::Encode(format!(
308                "sample rate mismatch: encoder configured for {}, frame has {}",
309                self.in_rate, frame.sample_rate
310            )));
311        }
312
313        if self.next_pts_us.is_none() {
314            self.next_pts_us = Some(frame.pts);
315        }
316
317        // Push samples into carry, possibly via resampler.
318        if let Some(r) = self.resampler.as_mut() {
319            r.process(frame, &mut self.sample_carry)?;
320        } else {
321            self.sample_carry.extend_from_slice(&frame.samples);
322        }
323
324        self.drain_packets()
325    }
326
327    fn flush(&mut self) -> Result<Vec<EncodedAudioPacket>, AudioError> {
328        if let Some(r) = self.resampler.as_mut() {
329            r.flush(&mut self.sample_carry)?;
330        }
331        // Pad the final partial frame with silence so libopus can emit
332        // a final packet (mux side will use pre_skip + the file's
333        // total sample count to know where playable audio ends).
334        let chans = self.channels as usize;
335        let frame_interleaved_len = OPUS_FRAME_SAMPLES_48K * chans;
336        if !self.sample_carry.is_empty() && self.sample_carry.len() < frame_interleaved_len {
337            self.sample_carry.resize(frame_interleaved_len, 0.0);
338        }
339        self.drain_packets()
340    }
341
342    fn pre_skip(&self) -> u16 {
343        self.pre_skip_48k
344    }
345
346    fn extra_data(&self) -> Vec<u8> {
347        self.extra_data.clone()
348    }
349}