rivet-container 0.2.0

Clean-room container demux (MP4/MKV/TS/AVI) and mux (AV1 MP4, CMAF, HLS) for the rivet transcoder, no FFmpeg. Imported as `container`.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
//! Streaming MPEG-TS demuxer (`TsStreamingDemuxer`).
//!
//! Holds the PES reassembly buffer for exactly one in-flight access unit;
//! yields a sample whenever a PUSI=1 packet closes the current one (or at
//! EOF for the final pending sample).

use anyhow::{Context, Result, bail};
use codec::frame::{ColorSpace, PixelFormat, StreamInfo};

use crate::demux::AudioTrack;
use crate::streaming::{DemuxHeader, Sample, StreamingDemuxer};

use super::{
    ProgramInfo, PatProgram,
    STREAM_TYPE_H264, STREAM_TYPE_HEVC, STREAM_TYPE_MPEG2_VIDEO,
    TS_PACKET, TS_SYNC,
};
use super::audio::extract_ts_audio;
use super::framerate::estimate_frame_rate_from_ptses;
use super::pat_pmt::{parse_pat_all_programs, parse_pmt_streams};
use super::pes::{parse_pes_header, scan_first_video_au};

/// Streaming MPEG-TS demuxer. Holds the PES reassembly buffer for one
/// in-flight access unit only — yields whenever a PUSI=1 packet
/// closes the current sample (or at EOF for the final pending sample).
///
/// Squad-37 added:
/// - **Multi-program awareness**: `programs()` returns every program
///   the PAT advertised plus their PMT contents; `select_program()`
///   switches the active video PID + audio extraction to a different
///   program. Default behaviour is unchanged (first program with a
///   recognised video stream wins).
/// - **Encrypted-stream guard**: if any packet on the active video PID
///   carries `transport_scrambling_control != 0`, we log a one-time
///   typed warn ("encrypted TS stream; we don't carry CA tables —
///   drop video output") and switch to a "drop everything" mode where
///   `next_video_sample` returns `Ok(None)` without further parsing.
pub struct TsStreamingDemuxer {
    data: Vec<u8>,
    header: DemuxHeader,
    audio: Option<AudioTrack>,
    packets: usize,
    packet_stride: usize,
    prefix_len: usize,
    /// Every program the PAT advertised, in PAT order. Each entry's
    /// PMT was walked at init to populate its video/audio stream lists.
    /// Programs whose PMT we couldn't parse are still listed (with
    /// empty video_streams/audio_streams) so callers see them.
    programs: Vec<ProgramInfo>,
    /// Index into `programs` of the currently active program. Default:
    /// the first program with a recognised video stream.
    active_program_idx: usize,
    /// Active video PID (mirrors `programs[active_program_idx].video_streams[0].pid`).
    video_pid: u16,
    /// Index of the next packet to scan.
    next_pkt: usize,
    /// In-flight PES payload — emptied & yielded on next PUSI.
    pending: Vec<u8>,
    /// PTS attached to `pending` (PTS lives in the PES header that
    /// opened the AU).
    pending_pts: Option<u64>,
    /// True once we've seen the first PUSI for our PID. Bytes before
    /// the first PUSI are dropped (mid-stream join semantics).
    have_first_start: bool,
    /// True after we've returned `Ok(None)` once — guards against
    /// repeated drains.
    eof: bool,
    /// Lazily set on first emitted sample — `pixel_format::detect` is
    /// one-shot against `samples[0]` so we patch `header.info.pixel_format`
    /// in place once and skip the probe thereafter.
    pixel_format_detected: bool,
    /// Encrypted-stream guard (Squad-37). Latches `true` the first time
    /// we see `transport_scrambling_control != 0` on the active video
    /// PID; warning is logged exactly once and `next_video_sample`
    /// returns `Ok(None)` from that point on.
    encrypted_drop: bool,
}

pub(crate) fn demux_ts_streaming_init(data: &[u8]) -> Result<TsStreamingDemuxer> {
    let owned = data.to_vec();
    let (packets, packet_stride, prefix_len) = super::detect_packet_layout(&owned)?;
    if packets == 0 {
        bail!("TS: file contains no TS packets");
    }

    // Phase 1: walk the PAT and collect every program + its PMT PID.
    let mut pat_programs: Vec<PatProgram> = Vec::new();
    for i in 0..packets {
        let start = i * packet_stride + prefix_len;
        let pkt = &owned[start..start + TS_PACKET];
        if pkt[0] != TS_SYNC {
            continue;
        }
        let pid = (((pkt[1] & 0x1F) as u16) << 8) | pkt[2] as u16;
        if pid == 0
            && let Some(payload) = super::ts_psi_payload(pkt)
        {
            let progs = parse_pat_all_programs(payload);
            if !progs.is_empty() {
                pat_programs = progs;
                break;
            }
        }
    }
    if pat_programs.is_empty() {
        bail!("TS: no PAT entries found");
    }

    // Phase 2: walk every PMT and resolve its video+audio streams. We
    // remember the FIRST PMT section we see per PID — later versions
    // (table_id 0x02 with a higher `version_number`) would update an
    // active session in a real-world receiver but our demuxer is
    // start-of-file-only, so first-section semantics are correct.
    let mut programs: Vec<ProgramInfo> = pat_programs
        .iter()
        .map(|p| ProgramInfo {
            program_number: p.program_number,
            pmt_pid: p.pmt_pid,
            video_streams: Vec::new(),
            audio_streams: Vec::new(),
        })
        .collect();
    // Track which programs still need their PMT parsed.
    let mut need: std::collections::HashSet<u16> =
        pat_programs.iter().map(|p| p.pmt_pid).collect();
    for i in 0..packets {
        if need.is_empty() {
            break;
        }
        let start = i * packet_stride + prefix_len;
        let pkt = &owned[start..start + TS_PACKET];
        if pkt[0] != TS_SYNC {
            continue;
        }
        let pid = (((pkt[1] & 0x1F) as u16) << 8) | pkt[2] as u16;
        if !need.contains(&pid) {
            continue;
        }
        if let Some(payload) = super::ts_psi_payload(pkt)
            && let Some((video_streams, audio_streams)) = parse_pmt_streams(payload)
        {
            if let Some(prog) = programs.iter_mut().find(|p| p.pmt_pid == pid) {
                prog.video_streams = video_streams;
                prog.audio_streams = audio_streams;
            }
            need.remove(&pid);
        }
    }

    // Phase 3: pick the default active program — first one with a
    // recognised video stream. Matches legacy "first program wins"
    // semantics for single-program files.
    let active_program_idx = programs
        .iter()
        .position(|p| !p.video_streams.is_empty())
        .context("TS: no program advertises a recognised video elementary stream")?;
    let active = &programs[active_program_idx];
    let video = active.video_streams[0];
    let audio = active.audio_streams.first().copied();
    let codec = match video.stream_type {
        STREAM_TYPE_MPEG2_VIDEO => "mpeg2",
        STREAM_TYPE_H264 => "h264",
        STREAM_TYPE_HEVC => "h265",
        other => bail!("TS: unsupported stream_type 0x{:02X}", other),
    }
    .to_string();

    // total_frames + duration are unknown until drained.
    //
    // width/height recovery: TS carries nothing at the container layer,
    // so we walk just enough packets to capture the first video AU and
    // parse its SPS (H.264 / HEVC) or sequence header (MPEG-2). This
    // has to happen during init — `header()` is read by the pipeline
    // before any `next_video_sample` call, and the rav1e encoder
    // rejects 0×0 configs outright. Parse failure is non-fatal: we
    // warn and leave dims at 0 so the failure surfaces in the encoder
    // config error rather than silently corrupting the output.
    //
    // frame_rate: same scan collects a window of video-PID PTSes (up
    // to 64 PUSIs). Inter-PTS span over (count-1) intervals at the
    // 90 kHz TS clock gives the source fps. A wrong frame_rate here
    // causes exactly the kind of "video sped up, audio drags" sync
    // symptom that the BBB 24 fps sample hit against the previous
    // hardcoded `30.0` fallback. Falls back to `30.0` only when the
    // scan can't derive a finite fps in [1.0, 240.0].
    let scan = scan_first_video_au(&owned, packets, packet_stride, prefix_len, video.pid, 64);
    let (width, height) = match &scan.first_au {
        Some(au) => {
            codec::pixel_format::detect_dims(&codec, std::slice::from_ref(au)).unwrap_or_else(
                || {
                    tracing::warn!(
                        codec = codec.as_str(),
                        video_pid = video.pid,
                        "TS streaming demux: first AU SPS parse failed; width/height=0×0"
                    );
                    (0, 0)
                },
            )
        }
        None => {
            tracing::warn!(
                codec = codec.as_str(),
                video_pid = video.pid,
                "TS streaming demux: could not locate first video AU during init; width/height=0×0"
            );
            (0, 0)
        }
    };
    let frame_rate = estimate_frame_rate_from_ptses(&scan.ptses).unwrap_or_else(|| {
        tracing::warn!(
            codec = codec.as_str(),
            video_pid = video.pid,
            pts_samples = scan.ptses.len(),
            "TS streaming demux: could not derive frame_rate from PTS window; defaulting to 30.0"
        );
        30.0
    });

    let info = StreamInfo {
        codec: codec.clone(),
        width,
        height,
        frame_rate,
        duration: 0.0,
        pixel_format: PixelFormat::Yuv420p,
        color_space: ColorSpace::Bt709,
        total_frames: 0,
        bitrate: 0,
        color_metadata: Default::default(),
    };

    // Audio passthrough still happens up-front (Squad-18 contract).
    // Squad-37 routes by codec kind (AAC / AC-3 / E-AC-3).
    let audio_track = audio.and_then(|info| {
        match extract_ts_audio(&owned, packets, packet_stride, prefix_len, info) {
            Ok(track) => track,
            Err(e) => {
                tracing::warn!(
                    audio_pid = info.pid,
                    audio_kind = ?info.kind,
                    error = %e,
                    "TS audio extraction failed; emitting video-only"
                );
                None
            }
        }
    });

    Ok(TsStreamingDemuxer {
        data: owned,
        header: DemuxHeader { codec, info },
        audio: audio_track,
        packets,
        packet_stride,
        prefix_len,
        programs,
        active_program_idx,
        video_pid: video.pid,
        next_pkt: 0,
        pending: Vec::new(),
        pending_pts: None,
        have_first_start: false,
        eof: false,
        pixel_format_detected: false,
        encrypted_drop: false,
    })
}

impl TsStreamingDemuxer {
    /// Every program the PAT advertised, in PAT order. Squad-37 multi-
    /// program API — useful for callers that want to enumerate channels
    /// in a multi-program transport (DVB / ATSC broadcast capture). For
    /// single-program files the slice has length 1.
    pub fn programs(&self) -> &[ProgramInfo] {
        &self.programs
    }

    /// Index of the currently active program (within `programs()`).
    pub fn active_program_index(&self) -> usize {
        self.active_program_idx
    }

    /// Switch the active program by PMT-side `program_number`. Resets the
    /// per-AU walk state (pending PES bytes, PTS, encrypted-drop guard,
    /// pixel-format detection) so the next `next_video_sample` call
    /// starts cleanly on the new video PID. Returns `Ok(())` on success
    /// or an error if `program_number` is not in `programs()` or the
    /// chosen program has no recognised video stream.
    ///
    /// Audio is re-extracted from the new program's first audio stream
    /// (if any). For single-program files (the common case) callers
    /// don't need to touch this; the constructor already picked program
    /// 0 by default.
    pub fn select_program(&mut self, program_number: u16) -> Result<()> {
        let new_idx = self
            .programs
            .iter()
            .position(|p| p.program_number == program_number)
            .with_context(|| format!("TS: program_number {} not found in PAT", program_number))?;
        if self.programs[new_idx].video_streams.is_empty() {
            bail!(
                "TS: program {} has no recognised video stream",
                program_number
            );
        }
        let video = self.programs[new_idx].video_streams[0];
        let audio = self.programs[new_idx].audio_streams.first().copied();
        let codec = match video.stream_type {
            STREAM_TYPE_MPEG2_VIDEO => "mpeg2",
            STREAM_TYPE_H264 => "h264",
            STREAM_TYPE_HEVC => "h265",
            other => bail!(
                "TS: program {} video stream_type 0x{:02X} unsupported",
                program_number,
                other
            ),
        }
        .to_string();
        self.active_program_idx = new_idx;
        self.video_pid = video.pid;
        // Refresh the codec / pixel-format fields on the cached header
        // — `info.codec` flows out of `header()` to the pipeline.
        self.header.codec = codec.clone();
        self.header.info.codec = codec.clone();
        self.header.info.pixel_format = PixelFormat::Yuv420p;
        self.pixel_format_detected = false;
        // Re-probe width/height + frame_rate from the new program's
        // video PID. Zero dims / 30 fps fallback on parse failure so
        // the encoder reports the miss rather than silently using the
        // previous program's values.
        let scan = scan_first_video_au(
            &self.data,
            self.packets,
            self.packet_stride,
            self.prefix_len,
            video.pid,
            64,
        );
        let (w, h) = match &scan.first_au {
            Some(au) => {
                codec::pixel_format::detect_dims(&codec, std::slice::from_ref(au))
                    .unwrap_or((0, 0))
            }
            None => (0, 0),
        };
        self.header.info.width = w;
        self.header.info.height = h;
        self.header.info.frame_rate =
            estimate_frame_rate_from_ptses(&scan.ptses).unwrap_or(30.0);
        // Reset PES walk state.
        self.next_pkt = 0;
        self.pending.clear();
        self.pending_pts = None;
        self.have_first_start = false;
        self.eof = false;
        self.encrypted_drop = false;
        // Re-extract audio from the new program's first audio stream.
        self.audio = audio.and_then(|info| {
            match extract_ts_audio(
                &self.data,
                self.packets,
                self.packet_stride,
                self.prefix_len,
                info,
            ) {
                Ok(track) => track,
                Err(e) => {
                    tracing::warn!(
                        audio_pid = info.pid,
                        audio_kind = ?info.kind,
                        error = %e,
                        "TS audio extraction failed on program switch; emitting video-only"
                    );
                    None
                }
            }
        });
        Ok(())
    }

    /// Build a Sample from raw AU bytes, applying the one-shot
    /// pixel_format detection on the first emission. Centralises the
    /// three yield sites in `next_video_sample`.
    fn yield_sample(&mut self, data: Vec<u8>, pts: Option<u64>) -> Sample {
        if !self.pixel_format_detected {
            let detected =
                codec::pixel_format::detect(&self.header.codec, std::slice::from_ref(&data));
            self.header.info.pixel_format = detected;
            self.pixel_format_detected = true;
        }
        Sample {
            data,
            pts_ticks: pts.map(|p| p as i64).unwrap_or(0),
            duration_ticks: 0,
        }
    }
}

impl StreamingDemuxer for TsStreamingDemuxer {
    fn header(&self) -> &DemuxHeader {
        &self.header
    }

    fn next_video_sample(&mut self) -> Result<Option<Sample>> {
        if self.eof || self.encrypted_drop {
            return Ok(None);
        }
        loop {
            if self.next_pkt >= self.packets {
                // Drain the final pending sample at EOF.
                self.eof = true;
                if !self.pending.is_empty() {
                    let data = std::mem::take(&mut self.pending);
                    let pts = self.pending_pts.take();
                    return Ok(Some(self.yield_sample(data, pts)));
                }
                return Ok(None);
            }

            let i = self.next_pkt;
            self.next_pkt += 1;
            let start = i * self.packet_stride + self.prefix_len;
            let pkt = &self.data[start..start + TS_PACKET];
            if pkt[0] != TS_SYNC {
                continue;
            }
            let pid = (((pkt[1] & 0x1F) as u16) << 8) | pkt[2] as u16;
            if pid != self.video_pid {
                continue;
            }
            let pusi = pkt[1] & 0x40 != 0;
            let scramble = (pkt[3] >> 6) & 0x03;
            if scramble != 0 {
                // Encrypted-stream guard (Squad-37). The first scrambled
                // packet on the active video PID triggers a one-time
                // typed warn and flips us into drop-everything mode —
                // any further `next_video_sample` calls return
                // `Ok(None)` immediately. We don't carry CA tables, so
                // any byte we feed downstream from here is garbage.
                tracing::warn!(
                    video_pid = self.video_pid,
                    transport_scrambling_control = scramble,
                    error_kind = "encrypted_ts",
                    "encrypted TS stream; we don't carry CA tables — drop video output"
                );
                self.encrypted_drop = true;
                self.pending.clear();
                self.pending_pts = None;
                self.have_first_start = false;
                self.eof = true;
                return Ok(None);
            }
            let adaptation = (pkt[3] >> 4) & 0x03;
            let has_payload = adaptation & 0x01 != 0;
            let has_adaptation = adaptation & 0x02 != 0;
            if !has_payload {
                continue;
            }

            let mut offset = 4usize;
            if has_adaptation {
                if offset >= TS_PACKET {
                    continue;
                }
                let adap_len = pkt[offset] as usize;
                offset += 1 + adap_len;
                if offset > TS_PACKET {
                    continue;
                }
            }
            if offset >= TS_PACKET {
                continue;
            }
            let payload = &pkt[offset..];

            if pusi {
                // PUSI flushes the previous AU. If we already had one
                // in-flight, return it now and stage the new one for
                // the next call.
                let had_pending = self.have_first_start;
                let prev_data = if had_pending {
                    std::mem::take(&mut self.pending)
                } else {
                    Vec::new()
                };
                let prev_pts = self.pending_pts.take();
                self.have_first_start = true;

                let Some((es_start, pts)) = parse_pes_header(payload) else {
                    // Malformed PES — drop state, keep walking.
                    self.have_first_start = false;
                    self.pending.clear();
                    if !prev_data.is_empty() {
                        return Ok(Some(self.yield_sample(prev_data, prev_pts)));
                    }
                    continue;
                };
                self.pending_pts = pts;
                if es_start < payload.len() {
                    self.pending.extend_from_slice(&payload[es_start..]);
                }

                if !prev_data.is_empty() {
                    return Ok(Some(self.yield_sample(prev_data, prev_pts)));
                }
                // No previous AU to yield — keep walking until the next
                // PUSI (or EOF).
            } else if self.have_first_start {
                self.pending.extend_from_slice(payload);
            }
        }
    }

    fn audio(&self) -> Option<&AudioTrack> {
        self.audio.as_ref()
    }
}