Skip to main content

container/avi/
mod.rs

1//! Minimal AVI (RIFF) demuxer.
2//!
3//! Scope: read the video stream out of a one-video-track AVI file, map
4//! the stream's handler / fourcc to one of the codec labels the
5//! transcoder knows how to decode, and emit per-frame samples in the
6//! order the file lays them down (presentation order — AVI does not
7//! have B-frame reordering at the container layer, stream samples are
8//! already display-order). Secondary audio tracks are dropped with a
9//! warning; the caller already handles that shape for MP4/MKV.
10//!
11//! OpenDML 1.0 super-indexes (Squad-38, 2026-04-17): files >1 GiB use
12//! multiple `LIST movi` chunks (one per ~1 GiB RIFF segment) plus an
13//! `indx` superindex chunk per stream that points at per-LIST `ix##`
14//! standard indexes. Detection happens at construction: presence of an
15//! `indx` chunk inside the video stream's `LIST strl` triggers the
16//! OpenDML path, which precomputes a sample chunk-offset list from the
17//! `indx` → `ix##` chain and `next_video_sample()` consumes that. When
18//! `indx` is absent we fall back to the legacy single-`movi` cursor walk.
19//! `dmlh.dwTotalFrames` from the `LIST odml` (sibling of `strl` LISTs in
20//! `hdrl`) supersedes `avih.dwTotalFrames` for OpenDML files because
21//! `avih` is a 32-bit field and gets truncated for clips longer than
22//! `2^32 / fps` frames.
23//!
24//! What's intentionally not supported:
25//! - Audio passthrough. AVI audio is usually MP3 or AC-3 anyway, not
26//!   AAC — outside the passthrough scope.
27//! - Variable-bitrate index reconstruction. We trust the sample order
28//!   in the `movi` LIST itself; `idx1` is only used as a fallback when
29//!   `movi` is missing (which real-world files don't exhibit).
30
31mod riff;
32mod opendml;
33mod streaming;
34
35#[cfg(test)]
36mod tests;
37
38pub use streaming::AviStreamingDemuxer;
39pub(crate) use streaming::demux_avi_streaming_init;
40
41use anyhow::{Context, Result, bail};
42use codec::frame::{ColorSpace, PixelFormat, StreamInfo};
43use crate::demux::DemuxResult;
44use opendml::read_dmlh_total_frames;
45use riff::{ascii, collect_movi_samples, find_video_stream, fourcc_to_codec,
46           scan_top_level_records};
47
48pub(crate) fn demux_avi(data: &[u8]) -> Result<DemuxResult> {
49    // RIFF header: "RIFF" u32-LE size "AVI ".  (Guaranteed present by
50    // the dispatch in `demux::detect_container`, but re-validate in
51    // case this is ever called directly.)
52    if data.len() < 12 || &data[..4] != b"RIFF" || &data[8..12] != b"AVI " {
53        bail!("not a RIFF/AVI file");
54    }
55
56    // The RIFF payload begins after the 12-byte header. From there we
57    // see a sequence of LIST/chunk records and optionally further
58    // top-level RIFF chunks (OpenDML 1.0 — multi-`movi` files split
59    // every ~1 GiB into a fresh `RIFF AVIX` segment that itself
60    // contains a `LIST movi`). The records we care about:
61    //   LIST hdrl  -> avih + (LIST strl { strh + strf [+ indx] })*
62    //                 + LIST odml { dmlh }
63    //   LIST movi  -> stream sample chunks (##dc / ##db / ##wb)
64    // For OpenDML the file is `RIFF AVI ` ... `RIFF AVIX` ... `RIFF AVIX` ...
65    // We scan the entire file for every `LIST movi` regardless of
66    // which RIFF segment it lives in.
67    let mut hdrl: Option<(usize, usize)> = None;
68    let mut movi_lists: Vec<(usize, usize)> = Vec::new();
69    scan_top_level_records(data, &mut hdrl, &mut movi_lists);
70
71    let (hdrl_start, hdrl_end) = hdrl.context("AVI: missing hdrl LIST")?;
72    if movi_lists.is_empty() {
73        bail!("AVI: missing movi LIST");
74    }
75
76    // Walk hdrl looking for the video stream's strl LIST. strl contains
77    // strh (stream header: type, handler fourcc) and strf (stream
78    // format: BITMAPINFOHEADER for video).
79    let video = find_video_stream(&data[hdrl_start..hdrl_end])
80        .context("AVI: no video stream found in hdrl")?;
81
82    let codec = fourcc_to_codec(&video.handler)
83        .or_else(|| fourcc_to_codec(&video.compression))
84        .with_context(|| {
85            format!(
86                "AVI: unsupported video fourcc {:?}/{:?}",
87                ascii(&video.handler),
88                ascii(&video.compression)
89            )
90        })?;
91
92    // Stream-id prefix for this video stream's sample chunks in movi.
93    // Two ASCII digits (stream index, zero-padded) + 'd' for 'dc' / 'b'
94    // for 'db'. E.g. stream 0 gives `00dc` (compressed DIB frame) or
95    // `00db` (uncompressed DIB frame).
96    let stream_idx = video.stream_index;
97    let prefix = format!("{:02}", stream_idx);
98
99    // Walk every movi LIST in file order (OpenDML splits one logical
100    // movi across multiple RIFF AVIX segments). For each LIST, pull
101    // every chunk whose fourcc starts with `<prefix>d` into samples in
102    // order. Non-video chunks (audio `##wb`, JUNK, `rec ` LISTs for
103    // OpenDML) are skipped.
104    let mut samples: Vec<Vec<u8>> = Vec::new();
105    for &(movi_start, movi_end) in &movi_lists {
106        collect_movi_samples(&data[movi_start..movi_end], &prefix, &mut samples)?;
107    }
108
109    if samples.is_empty() {
110        bail!(
111            "AVI: movi LIST contained no video samples for stream {:02}",
112            stream_idx
113        );
114    }
115
116    // Prefer dmlh.dwTotalFrames over the materialized sample count when
117    // OpenDML is present — for >1 GiB files, dmlh is the spec-mandated
118    // accurate count; avih.dwTotalFrames is u32 and may have wrapped.
119    // Falling back to samples.len() preserves legacy behaviour for
120    // single-`movi` files without an odml LIST.
121    let total_frames =
122        read_dmlh_total_frames(&data[hdrl_start..hdrl_end]).unwrap_or(samples.len() as u64);
123    let duration = if video.frame_rate > 0.0 {
124        total_frames as f64 / video.frame_rate
125    } else {
126        0.0
127    };
128
129    let info = StreamInfo {
130        codec: codec.clone(),
131        width: video.width,
132        height: video.height,
133        frame_rate: video.frame_rate,
134        duration,
135        // AVI's BITMAPINFOHEADER does not carry a spec-grade pixel
136        // format — the fourcc implies 4:2:0 for the codecs we
137        // actually support (MPEG-4 Part 2 / DivX / H.264 Baseline
138        // in AVI). Downstream `pixel_format::detect` can refine
139        // this once a codec-level parse runs.
140        pixel_format: PixelFormat::Yuv420p,
141        color_space: ColorSpace::Bt709,
142        color_metadata: Default::default(),
143        total_frames,
144        // AVI's dwRate/dwScale in strh gives a bps for audio, not
145        // video. Real video bitrate requires codec-level inspection,
146        // which we punt on. 0 means "unknown" to the pipeline — same
147        // posture as the MKV demuxer.
148        bitrate: 0,
149    };
150
151    // Refine pixel_format from the bitstream now that we have samples.
152    let detected_pf = codec::pixel_format::detect(&codec, &samples);
153    let info = StreamInfo {
154        pixel_format: detected_pf,
155        ..info
156    };
157
158    Ok(DemuxResult {
159        codec,
160        info,
161        samples,
162        audio: None,
163    })
164}