Skip to main content

codec/audio/decode/
vorbis.rs

1//! Vorbis decoder wrapping `lewton` (pure-Rust, MIT/Apache-2.0).
2//!
3//! Vorbis storage in MKV / WebM doesn't include the OGG container —
4//! MKV's `CodecPrivate` carries the three Xiph setup headers (ident,
5//! comment, setup) packed in the Xiph "lacing" layout, then each
6//! `Block` holds one raw audio packet. Squad-23's audio mux side will
7//! receive Vorbis sources from MKV demux this way; we accept the
8//! packed CodecPrivate as `extra_data` on construction and use lewton's
9//! lower-level packet API.
10//!
11//! For OGG-Vorbis files (`.ogg` / `.oga`) the MP4 mux side hands us
12//! whole audio packets the OGG demuxer split out — same code path,
13//! since lewton's per-packet API is stateless across the container.
14//!
15//! Xiph lacing layout (used by MKV CodecPrivate, FLAC METADATA_BLOCK,
16//! and several other places that need to pack 3 variable-length blobs
17//! into a flat byte buffer):
18//! - byte 0: number of headers minus one (so byte 0 = 2 for Vorbis)
19//! - bytes 1..N: lacing values for headers 0..N-2 (each: a sequence of
20//!   0xFF bytes terminated by a non-0xFF byte; the lengths sum)
21//! - then the headers in order (header 0, 1, ..., N-1). The last
22//!   header's length is computed from the total CodecPrivate length
23//!   minus the lacing prefix and the sum of explicit lacing lengths.
24
25use lewton::audio::{PreviousWindowRight, read_audio_packet_generic};
26use lewton::header::{
27    HeaderReadError, IdentHeader, SetupHeader, read_header_ident, read_header_setup,
28};
29
30use crate::audio::{AudioDecoder, AudioError, AudioFrame};
31
32pub struct VorbisDecoder {
33    ident: IdentHeader,
34    setup: SetupHeader,
35    pwr: PreviousWindowRight,
36    declared_sample_rate: u32,
37    /// Caller-declared channel count (defaults to ident header value
38    /// when the container reports 0). Held for cross-check; current
39    /// path uses the per-packet decoded channel count from lewton.
40    #[allow(dead_code)]
41    declared_channels: u8,
42    /// Running PTS in microseconds. Set on first `decode` call.
43    next_pts_us: Option<i64>,
44}
45
46impl VorbisDecoder {
47    /// Construct a decoder. `extra_data` MUST be the Xiph-laced
48    /// concatenation of the three Vorbis setup packets (ident +
49    /// comment + setup), as MKV CodecPrivate carries it. For OGG
50    /// sources, the demuxer is responsible for assembling the same
51    /// layout from the first three packets before calling here.
52    pub fn new(
53        extra_data: Option<&[u8]>,
54        sample_rate: u32,
55        channels: u8,
56    ) -> Result<Self, AudioError> {
57        let extra = extra_data.ok_or_else(|| {
58            AudioError::Decode(
59                "vorbis decoder needs CodecPrivate-style setup headers as extra_data".to_string(),
60            )
61        })?;
62        let (ident_bytes, _comment_bytes, setup_bytes) = parse_xiph_lacing(extra)?;
63
64        let ident = read_header_ident(ident_bytes)
65            .map_err(|e| AudioError::Decode(format!("vorbis ident header: {}", header_err(&e))))?;
66
67        // Cross-check container claims against the bitstream's own.
68        // The container side may report 0 if the demuxer didn't have
69        // Audio metadata; we tolerate that and trust the ident header.
70        let cs = if sample_rate == 0 {
71            ident.audio_sample_rate
72        } else {
73            sample_rate
74        };
75        let cc = if channels == 0 {
76            ident.audio_channels
77        } else {
78            channels
79        };
80
81        if ident.audio_channels == 0 || ident.audio_channels > 2 {
82            return Err(AudioError::Unsupported(format!(
83                "vorbis channel count {} (this decoder routes >2 channels through resampler/encoder which only supports mono/stereo)",
84                ident.audio_channels
85            )));
86        }
87
88        let setup = read_header_setup(
89            setup_bytes,
90            ident.audio_channels,
91            (ident.blocksize_0, ident.blocksize_1),
92        )
93        .map_err(|e| AudioError::Decode(format!("vorbis setup header: {}", header_err(&e))))?;
94
95        Ok(Self {
96            ident,
97            setup,
98            pwr: PreviousWindowRight::new(),
99            declared_sample_rate: cs,
100            declared_channels: cc,
101            next_pts_us: None,
102        })
103    }
104}
105
106impl AudioDecoder for VorbisDecoder {
107    fn decode(&mut self, packet: &[u8], pts: i64) -> Result<Vec<AudioFrame>, AudioError> {
108        if self.next_pts_us.is_none() {
109            self.next_pts_us = Some(pts);
110        }
111        if packet.is_empty() {
112            return Ok(Vec::new());
113        }
114
115        // Vorbis returns Vec<Vec<f32>> per channel (planar). We flatten
116        // to interleaved planar to match AudioFrame's contract.
117        let decoded: Vec<Vec<f32>> = read_audio_packet_generic::<Vec<Vec<f32>>>(
118            &self.ident,
119            &self.setup,
120            packet,
121            &mut self.pwr,
122        )
123        .map_err(|e| AudioError::Decode(format!("vorbis audio packet: {e:?}")))?;
124
125        if decoded.is_empty() {
126            return Ok(Vec::new());
127        }
128        let channels = decoded.len() as u8;
129        if channels == 0 {
130            return Ok(Vec::new());
131        }
132        let frames_per_channel = decoded[0].len();
133        if frames_per_channel == 0 {
134            return Ok(Vec::new());
135        }
136
137        let mut interleaved = Vec::with_capacity(frames_per_channel * channels as usize);
138        for i in 0..frames_per_channel {
139            for ch in 0..channels as usize {
140                let s = decoded[ch][i];
141                // lewton already produces f32 in [-1, 1]; clamp
142                // defensively to match AudioFrame's contract.
143                interleaved.push(s.clamp(-1.0, 1.0));
144            }
145        }
146
147        let pts_us = self.next_pts_us.unwrap_or(pts);
148        let frame_us = (frames_per_channel as i64 * 1_000_000) / self.declared_sample_rate as i64;
149        self.next_pts_us = Some(pts_us + frame_us);
150
151        Ok(vec![AudioFrame {
152            samples: interleaved,
153            sample_rate: self.declared_sample_rate,
154            channels,
155            pts: pts_us,
156        }])
157    }
158
159    fn flush(&mut self) -> Result<Vec<AudioFrame>, AudioError> {
160        // Vorbis is stateless after each packet flush — there's no
161        // tail buffer to drain. PreviousWindowRight only matters for
162        // the next packet's IMDCT overlap-add.
163        Ok(Vec::new())
164    }
165}
166
167/// Parse a 3-element Xiph lacing buffer. Used by MKV CodecPrivate for
168/// Vorbis (and FLAC). Returns the three header byte slices.
169fn parse_xiph_lacing(bytes: &[u8]) -> Result<(&[u8], &[u8], &[u8]), AudioError> {
170    if bytes.is_empty() {
171        return Err(AudioError::Decode("vorbis extra_data is empty".to_string()));
172    }
173    let n_minus_1 = bytes[0] as usize;
174    if n_minus_1 != 2 {
175        return Err(AudioError::Decode(format!(
176            "vorbis extra_data lacing prefix says n-1={n_minus_1}, expected 2 (3 headers)"
177        )));
178    }
179    // Read lacing values for headers 0..N-2 (so for N=3, 2 values).
180    let mut cursor = 1usize;
181    let mut lengths = [0usize; 2];
182    for slot in lengths.iter_mut() {
183        let mut total = 0usize;
184        loop {
185            if cursor >= bytes.len() {
186                return Err(AudioError::Decode(
187                    "vorbis extra_data ended inside Xiph lacing length".to_string(),
188                ));
189            }
190            let v = bytes[cursor] as usize;
191            cursor += 1;
192            total += v;
193            if v != 0xFF {
194                break;
195            }
196        }
197        *slot = total;
198    }
199    let len0 = lengths[0];
200    let len1 = lengths[1];
201    let header_bytes_start = cursor;
202    if header_bytes_start + len0 + len1 > bytes.len() {
203        return Err(AudioError::Decode(format!(
204            "vorbis extra_data: lacing lengths {} + {} + tail exceed buffer ({} bytes after prefix, total {})",
205            len0,
206            len1,
207            bytes.len() - header_bytes_start,
208            bytes.len()
209        )));
210    }
211    let len2 = bytes.len() - header_bytes_start - len0 - len1;
212    let h0 = &bytes[header_bytes_start..header_bytes_start + len0];
213    let h1 = &bytes[header_bytes_start + len0..header_bytes_start + len0 + len1];
214    let h2 = &bytes[header_bytes_start + len0 + len1..header_bytes_start + len0 + len1 + len2];
215    Ok((h0, h1, h2))
216}
217
218fn header_err(e: &HeaderReadError) -> String {
219    format!("{e:?}")
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn xiph_lacing_parses_simple_three_segment_buffer() {
228        // Header lengths 30, 19, 5 packed:
229        // prefix: [2, 30, 19, then 30 + 19 + 5 = 54 bytes of payload]
230        let mut buf = vec![2u8, 30, 19];
231        buf.extend(std::iter::repeat(0xAAu8).take(30));
232        buf.extend(std::iter::repeat(0xBBu8).take(19));
233        buf.extend(std::iter::repeat(0xCCu8).take(5));
234        let (a, b, c) = parse_xiph_lacing(&buf).expect("parses");
235        assert_eq!(a.len(), 30);
236        assert_eq!(b.len(), 19);
237        assert_eq!(c.len(), 5);
238        assert!(a.iter().all(|x| *x == 0xAA));
239        assert!(b.iter().all(|x| *x == 0xBB));
240        assert!(c.iter().all(|x| *x == 0xCC));
241    }
242
243    #[test]
244    fn xiph_lacing_handles_long_runs() {
245        // Length-260 segment encodes as 0xFF 0x05 (255 + 5).
246        let mut buf = vec![2u8, 0xFF, 0x05, 0x10];
247        buf.extend(std::iter::repeat(0u8).take(260));
248        buf.extend(std::iter::repeat(1u8).take(16));
249        buf.extend(std::iter::repeat(2u8).take(8));
250        let (a, b, c) = parse_xiph_lacing(&buf).expect("parses");
251        assert_eq!(a.len(), 260);
252        assert_eq!(b.len(), 16);
253        assert_eq!(c.len(), 8);
254    }
255
256    #[test]
257    fn xiph_lacing_rejects_wrong_header_count() {
258        let buf = vec![1u8, 5, 5];
259        assert!(parse_xiph_lacing(&buf).is_err());
260    }
261
262    #[test]
263    fn xiph_lacing_rejects_truncated_buffer() {
264        let buf = vec![2u8, 30, 19, 0, 0]; // only 2 payload bytes claimed >0
265        assert!(parse_xiph_lacing(&buf).is_err());
266    }
267
268    #[test]
269    fn vorbis_decoder_rejects_missing_extra_data() {
270        let r = VorbisDecoder::new(None, 44100, 2);
271        assert!(matches!(r, Err(AudioError::Decode(_))));
272    }
273
274    #[test]
275    fn vorbis_decoder_rejects_garbage_extra_data() {
276        // Looks like proper lacing prefix but ident header parser bails.
277        let extra = vec![2u8, 30, 19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
278        let r = VorbisDecoder::new(Some(&extra), 44100, 2);
279        assert!(matches!(r, Err(AudioError::Decode(_))));
280    }
281}