Skip to main content

container/
avi.rs

1//! Minimal AVI (RIFF) demuxer.
2//!
3//! Scope: read the video stream out of a one-video-track AVI file, map
4//! the stream's handler / fourcc to one of the codec labels the
5//! transcoder knows how to decode, and emit per-frame samples in the
6//! order the file lays them down (presentation order — AVI does not
7//! have B-frame reordering at the container layer, stream samples are
8//! already display-order). Secondary audio tracks are dropped with a
9//! warning; the caller already handles that shape for MP4/MKV.
10//!
11//! OpenDML 1.0 super-indexes (Squad-38, 2026-04-17): files >1 GiB use
12//! multiple `LIST movi` chunks (one per ~1 GiB RIFF segment) plus an
13//! `indx` superindex chunk per stream that points at per-LIST `ix##`
14//! standard indexes. Detection happens at construction: presence of an
15//! `indx` chunk inside the video stream's `LIST strl` triggers the
16//! OpenDML path, which precomputes a sample chunk-offset list from the
17//! `indx` → `ix##` chain and `next_video_sample()` consumes that. When
18//! `indx` is absent we fall back to the legacy single-`movi` cursor walk.
19//! `dmlh.dwTotalFrames` from the `LIST odml` (sibling of `strl` LISTs in
20//! `hdrl`) supersedes `avih.dwTotalFrames` for OpenDML files because
21//! `avih` is a 32-bit field and gets truncated for clips longer than
22//! `2^32 / fps` frames.
23//!
24//! What's intentionally not supported:
25//! - Audio passthrough. AVI audio is usually MP3 or AC-3 anyway, not
26//!   AAC — outside the passthrough scope.
27//! - Variable-bitrate index reconstruction. We trust the sample order
28//!   in the `movi` LIST itself; `idx1` is only used as a fallback when
29//!   `movi` is missing (which real-world files don't exhibit).
30
31use anyhow::{Context, Result, bail};
32use codec::frame::{ColorSpace, PixelFormat, StreamInfo};
33
34use crate::demux::{AudioTrack, DemuxResult};
35use crate::streaming::{DemuxHeader, Sample, StreamingDemuxer};
36
37pub(crate) fn demux_avi(data: &[u8]) -> Result<DemuxResult> {
38    // RIFF header: "RIFF" u32-LE size "AVI ".  (Guaranteed present by
39    // the dispatch in `demux::detect_container`, but re-validate in
40    // case this is ever called directly.)
41    if data.len() < 12 || &data[..4] != b"RIFF" || &data[8..12] != b"AVI " {
42        bail!("not a RIFF/AVI file");
43    }
44
45    // The RIFF payload begins after the 12-byte header. From there we
46    // see a sequence of LIST/chunk records and optionally further
47    // top-level RIFF chunks (OpenDML 1.0 — multi-`movi` files split
48    // every ~1 GiB into a fresh `RIFF AVIX` segment that itself
49    // contains a `LIST movi`). The records we care about:
50    //   LIST hdrl  -> avih + (LIST strl { strh + strf [+ indx] })*
51    //                 + LIST odml { dmlh }
52    //   LIST movi  -> stream sample chunks (##dc / ##db / ##wb)
53    // For OpenDML the file is `RIFF AVI ` ... `RIFF AVIX` ... `RIFF AVIX` ...
54    // We scan the entire file for every `LIST movi` regardless of
55    // which RIFF segment it lives in.
56    let mut hdrl: Option<(usize, usize)> = None;
57    let mut movi_lists: Vec<(usize, usize)> = Vec::new();
58    scan_top_level_records(data, &mut hdrl, &mut movi_lists);
59
60    let (hdrl_start, hdrl_end) = hdrl.context("AVI: missing hdrl LIST")?;
61    if movi_lists.is_empty() {
62        bail!("AVI: missing movi LIST");
63    }
64
65    // Walk hdrl looking for the video stream's strl LIST. strl contains
66    // strh (stream header: type, handler fourcc) and strf (stream
67    // format: BITMAPINFOHEADER for video).
68    let video = find_video_stream(&data[hdrl_start..hdrl_end])
69        .context("AVI: no video stream found in hdrl")?;
70
71    let codec = fourcc_to_codec(&video.handler)
72        .or_else(|| fourcc_to_codec(&video.compression))
73        .with_context(|| {
74            format!(
75                "AVI: unsupported video fourcc {:?}/{:?}",
76                ascii(&video.handler),
77                ascii(&video.compression)
78            )
79        })?;
80
81    // Stream-id prefix for this video stream's sample chunks in movi.
82    // Two ASCII digits (stream index, zero-padded) + 'd' for 'dc' / 'b'
83    // for 'db'. E.g. stream 0 gives `00dc` (compressed DIB frame) or
84    // `00db` (uncompressed DIB frame).
85    let stream_idx = video.stream_index;
86    let prefix = format!("{:02}", stream_idx);
87
88    // Walk every movi LIST in file order (OpenDML splits one logical
89    // movi across multiple RIFF AVIX segments). For each LIST, pull
90    // every chunk whose fourcc starts with `<prefix>d` into samples in
91    // order. Non-video chunks (audio `##wb`, JUNK, `rec ` LISTs for
92    // OpenDML) are skipped.
93    let mut samples: Vec<Vec<u8>> = Vec::new();
94    for &(movi_start, movi_end) in &movi_lists {
95        collect_movi_samples(&data[movi_start..movi_end], &prefix, &mut samples)?;
96    }
97
98    if samples.is_empty() {
99        bail!(
100            "AVI: movi LIST contained no video samples for stream {:02}",
101            stream_idx
102        );
103    }
104
105    // Prefer dmlh.dwTotalFrames over the materialized sample count when
106    // OpenDML is present — for >1 GiB files, dmlh is the spec-mandated
107    // accurate count; avih.dwTotalFrames is u32 and may have wrapped.
108    // Falling back to samples.len() preserves legacy behaviour for
109    // single-`movi` files without an odml LIST.
110    let total_frames =
111        read_dmlh_total_frames(&data[hdrl_start..hdrl_end]).unwrap_or(samples.len() as u64);
112    let duration = if video.frame_rate > 0.0 {
113        total_frames as f64 / video.frame_rate
114    } else {
115        0.0
116    };
117
118    let info = StreamInfo {
119        codec: codec.clone(),
120        width: video.width,
121        height: video.height,
122        frame_rate: video.frame_rate,
123        duration,
124        // AVI's BITMAPINFOHEADER does not carry a spec-grade pixel
125        // format — the fourcc implies 4:2:0 for the codecs we
126        // actually support (MPEG-4 Part 2 / DivX / H.264 Baseline
127        // in AVI). Downstream `pixel_format::detect` can refine
128        // this once a codec-level parse runs.
129        pixel_format: PixelFormat::Yuv420p,
130        color_space: ColorSpace::Bt709,
131        color_metadata: Default::default(),
132        total_frames,
133        // AVI's dwRate/dwScale in strh gives a bps for audio, not
134        // video. Real video bitrate requires codec-level inspection,
135        // which we punt on. 0 means "unknown" to the pipeline — same
136        // posture as the MKV demuxer.
137        bitrate: 0,
138    };
139
140    // Refine pixel_format from the bitstream now that we have samples.
141    let detected_pf = codec::pixel_format::detect(&codec, &samples);
142    let info = StreamInfo {
143        pixel_format: detected_pf,
144        ..info
145    };
146
147    Ok(DemuxResult {
148        codec,
149        info,
150        samples,
151        audio: None,
152    })
153}
154
155#[derive(Debug)]
156struct VideoStream {
157    stream_index: u32,
158    /// fccHandler from strh — usually the compressor identifier
159    /// (DIV3/DIVX/DX50/XVID for Part 2, H264/X264 for AVC, etc.).
160    handler: [u8; 4],
161    /// biCompression from strf's BITMAPINFOHEADER — sometimes the
162    /// clearer codec tag when strh.fccHandler has been rewritten by
163    /// editors to something generic like `vids` or zero.
164    compression: [u8; 4],
165    width: u32,
166    height: u32,
167    frame_rate: f64,
168}
169
170fn find_video_stream(hdrl: &[u8]) -> Option<VideoStream> {
171    // hdrl contains avih (MainAVIHeader) followed by one LIST strl per
172    // stream. We're only interested in the first video stream.
173    let mut pos = 0;
174    let mut stream_idx: u32 = 0;
175    while pos + 8 <= hdrl.len() {
176        let size = u32::from_le_bytes([hdrl[pos + 4], hdrl[pos + 5], hdrl[pos + 6], hdrl[pos + 7]])
177            as usize;
178        let fourcc = &hdrl[pos..pos + 4];
179        let payload_start = pos + 8;
180        let payload_end = payload_start + size;
181        if payload_end > hdrl.len() {
182            break;
183        }
184        if fourcc == b"LIST" && payload_start + 4 <= payload_end {
185            let list_type = &hdrl[payload_start..payload_start + 4];
186            if list_type == b"strl" {
187                let strl = &hdrl[payload_start + 4..payload_end];
188                if let Some(v) = parse_strl(strl, stream_idx) {
189                    return Some(v);
190                }
191                stream_idx += 1;
192            }
193        }
194        pos = payload_end + (payload_end & 1);
195    }
196    None
197}
198
199fn parse_strl(strl: &[u8], stream_index: u32) -> Option<VideoStream> {
200    // strl contains strh + strf (+ optionally strd, JUNK). strh layout:
201    //   fccType          u32 ("vids"/"auds"/...)
202    //   fccHandler       u32
203    //   dwFlags          u32
204    //   wPriority wLang  u16 u16
205    //   dwInitialFrames  u32
206    //   dwScale          u32   <-- frame rate = dwRate / dwScale
207    //   dwRate           u32
208    //   dwStart          u32
209    //   dwLength         u32
210    //   dwSuggestedBufSize u32
211    //   dwQuality dwSampleSize u32 u32
212    //   rcFrame          i16 i16 i16 i16
213    let mut strh: Option<&[u8]> = None;
214    let mut strf: Option<&[u8]> = None;
215    let mut pos = 0;
216    while pos + 8 <= strl.len() {
217        let fourcc = &strl[pos..pos + 4];
218        let size = u32::from_le_bytes([strl[pos + 4], strl[pos + 5], strl[pos + 6], strl[pos + 7]])
219            as usize;
220        let end = pos + 8 + size;
221        if end > strl.len() {
222            break;
223        }
224        let body = &strl[pos + 8..end];
225        if fourcc == b"strh" {
226            strh = Some(body);
227        } else if fourcc == b"strf" {
228            strf = Some(body);
229        }
230        pos = end + (end & 1);
231    }
232    let strh = strh?;
233    let strf = strf?;
234    if strh.len() < 32 {
235        return None;
236    }
237    let fcc_type: [u8; 4] = strh[0..4].try_into().ok()?;
238    if &fcc_type != b"vids" {
239        return None;
240    }
241    let handler: [u8; 4] = strh[4..8].try_into().ok()?;
242    let scale = u32::from_le_bytes([strh[20], strh[21], strh[22], strh[23]]);
243    let rate = u32::from_le_bytes([strh[24], strh[25], strh[26], strh[27]]);
244    let frame_rate = if scale > 0 {
245        rate as f64 / scale as f64
246    } else {
247        30.0
248    };
249
250    // BITMAPINFOHEADER (strf for vids):
251    //   biSize, biWidth (i32), biHeight (i32), biPlanes u16, biBitCount u16,
252    //   biCompression u32 (fourcc), biSizeImage, biXPelsPerMeter,
253    //   biYPelsPerMeter, biClrUsed, biClrImportant
254    if strf.len() < 20 {
255        return None;
256    }
257    let width = i32::from_le_bytes([strf[4], strf[5], strf[6], strf[7]]).unsigned_abs();
258    let height = i32::from_le_bytes([strf[8], strf[9], strf[10], strf[11]]).unsigned_abs();
259    let compression: [u8; 4] = strf[16..20].try_into().ok()?;
260
261    Some(VideoStream {
262        stream_index,
263        handler,
264        compression,
265        width,
266        height,
267        frame_rate,
268    })
269}
270
271/// Map an AVI fourcc (handler or biCompression) to one of the codec
272/// labels the decoder factory recognises. Returns None for types we
273/// don't support yet — the caller bails with a specific error listing
274/// both fourccs tried.
275fn fourcc_to_codec(fcc: &[u8; 4]) -> Option<String> {
276    // Case-fold so "xvid"/"XVID"/"XviD" all match.
277    let mut norm = [0u8; 4];
278    for (i, b) in fcc.iter().enumerate() {
279        norm[i] = if (b'a'..=b'z').contains(b) {
280            b - 32
281        } else {
282            *b
283        };
284    }
285    match &norm {
286        // MPEG-4 Part 2 family (DivX / XviD and friends)
287        b"DIVX" | b"DX50" | b"XVID" | b"DIV3" | b"DIV4" | b"DIV5" | b"DIV6" | b"MP4V" | b"MP4S"
288        | b"M4S2" | b"FMP4" | b"DM4V" | b"3IVX" | b"3IV2" | b"XVIX" => Some("mpeg4".into()),
289        // H.264 in AVI (rare but real — older GoPro / legacy pipelines).
290        b"H264" | b"X264" | b"AVC1" | b"DAVC" => Some("h264".into()),
291        // MPEG-2 in AVI is unusual but not impossible.
292        b"MPG2" | b"MPEG" => Some("mpeg2".into()),
293        _ => None,
294    }
295}
296
297/// Walk a movi LIST body pulling out every video sample (chunks whose
298/// fourcc starts with `<stream_prefix>d`). `rec ` sub-LISTs (OpenDML
299/// segmentation) recurse one level. Anything else is skipped.
300fn collect_movi_samples(movi: &[u8], stream_prefix: &str, out: &mut Vec<Vec<u8>>) -> Result<()> {
301    let prefix = stream_prefix.as_bytes();
302    if prefix.len() != 2 {
303        bail!("stream prefix must be 2 chars, got {:?}", stream_prefix);
304    }
305    let mut pos = 0;
306    while pos + 8 <= movi.len() {
307        let fcc = &movi[pos..pos + 4];
308        let size = u32::from_le_bytes([movi[pos + 4], movi[pos + 5], movi[pos + 6], movi[pos + 7]])
309            as usize;
310        let payload_start = pos + 8;
311        let payload_end = payload_start + size;
312        if payload_end > movi.len() {
313            // Truncated tail — stop here rather than bail; gives us
314            // whatever samples we've already picked up.
315            break;
316        }
317        if fcc == b"LIST" && payload_start + 4 <= payload_end {
318            let list_type = &movi[payload_start..payload_start + 4];
319            if list_type == b"rec " {
320                collect_movi_samples(&movi[payload_start + 4..payload_end], stream_prefix, out)?;
321            }
322        } else if fcc.len() == 4 && fcc[0] == prefix[0] && fcc[1] == prefix[1] {
323            // `##dc` = compressed DIB, `##db` = uncompressed DIB — both
324            // are legitimate video sample chunks. `##dd` is an OpenDML
325            // keyframe index we ignore.
326            let kind = fcc[3];
327            if kind == b'c' || kind == b'b' {
328                out.push(movi[payload_start..payload_end].to_vec());
329            }
330        }
331        pos = payload_end + (payload_end & 1);
332    }
333    Ok(())
334}
335
336fn ascii(b: &[u8; 4]) -> String {
337    b.iter()
338        .map(|c| {
339            if c.is_ascii_graphic() {
340                *c as char
341            } else {
342                '.'
343            }
344        })
345        .collect()
346}
347
348// ---------------------------------------------------------------------------
349// AviStreamingDemuxer (Squad streaming-migration-55 P1)
350// ---------------------------------------------------------------------------
351
352/// Streaming AVI demuxer. Owns the input bytes and walks the `movi`
353/// LIST(s) one chunk at a time. Two backends:
354/// - **Legacy single-movi cursor walk** (`Backend::Cursor`): a stack of
355///   (pos, end) frames over a single `LIST movi`. `rec ` sub-LISTs push
356///   a new frame; we pop on EOF to resume the parent.
357/// - **OpenDML index walk** (`Backend::OpenDml`): a precomputed list of
358///   `(absolute byte offset, size)` sample chunks assembled from the
359///   stream's `indx` superindex + each `ix##` sub-index. `next_video_sample`
360///   advances `cursor` and reads `data[offset..offset+size]`.
361/// The streaming impl never holds more than the current sample's bytes
362/// regardless of backend.
363pub struct AviStreamingDemuxer {
364    data: Vec<u8>,
365    header: DemuxHeader,
366    backend: Backend,
367    /// Two-character stream prefix derived from the video stream's
368    /// index. e.g. stream 0 → "00". Only used by the cursor backend.
369    prefix: [u8; 2],
370    /// Frame index — used as a synthetic monotonic PTS in samples-since-
371    /// start. AVI doesn't carry per-sample PTS at the container layer.
372    next_idx: u64,
373    /// Lazily set on first sample: `pixel_format::detect` is one-shot
374    /// against the first sample, so we patch `header.info.pixel_format`
375    /// in place once and skip the probe thereafter.
376    pixel_format_detected: bool,
377}
378
379enum Backend {
380    /// Walk one or more `LIST movi` records linearly. The Vec is
381    /// initialised with one entry per top-level movi LIST in file
382    /// order; `rec ` sub-LISTs push additional frames during walk and
383    /// pop at EOF. We always operate on the LAST entry (top of stack).
384    Cursor(Vec<(usize, usize)>),
385    /// Precomputed (absolute_offset_of_chunk_data, data_size) list
386    /// drawn from the indx → ix## chain. `cursor` indexes into it.
387    OpenDml {
388        samples: Vec<(usize, usize)>,
389        cursor: usize,
390    },
391}
392
393pub(crate) fn demux_avi_streaming_init(data: &[u8]) -> Result<AviStreamingDemuxer> {
394    if data.len() < 12 || &data[..4] != b"RIFF" || &data[8..12] != b"AVI " {
395        bail!("not a RIFF/AVI file");
396    }
397    let owned = data.to_vec();
398
399    let mut hdrl: Option<(usize, usize)> = None;
400    let mut movi_lists: Vec<(usize, usize)> = Vec::new();
401    scan_top_level_records(&owned, &mut hdrl, &mut movi_lists);
402
403    let (hdrl_start, hdrl_end) = hdrl.context("AVI: missing hdrl LIST")?;
404    if movi_lists.is_empty() {
405        bail!("AVI: missing movi LIST");
406    }
407
408    let video = find_video_stream(&owned[hdrl_start..hdrl_end])
409        .context("AVI: no video stream found in hdrl")?;
410    let codec = fourcc_to_codec(&video.handler)
411        .or_else(|| fourcc_to_codec(&video.compression))
412        .with_context(|| {
413            format!(
414                "AVI: unsupported video fourcc {:?}/{:?}",
415                ascii(&video.handler),
416                ascii(&video.compression)
417            )
418        })?;
419
420    let stream_idx = video.stream_index;
421    let prefix_str = format!("{:02}", stream_idx);
422    let prefix_bytes = prefix_str.as_bytes();
423    if prefix_bytes.len() != 2 {
424        bail!("AVI: stream index out of range");
425    }
426    let prefix = [prefix_bytes[0], prefix_bytes[1]];
427
428    // OpenDML detection: look for an `indx` superindex inside the
429    // chosen stream's `LIST strl`. Presence triggers the ix##-walking
430    // backend; absence falls back to the legacy cursor walk over each
431    // `LIST movi` LIST in order.
432    let backend =
433        if let Some(ix_refs) = locate_stream_indx(&owned[hdrl_start..hdrl_end], stream_idx) {
434            // Each `qwOffset` in ix_refs is an absolute file offset to an
435            // `ix##` chunk's 8-byte header. Parse each in turn and append
436            // its sample chunks to one big list, in superindex order.
437            let mut samples: Vec<(usize, usize)> = Vec::new();
438            for (ix_off, ix_size) in ix_refs {
439                parse_ix_chunk(&owned, ix_off, ix_size, &prefix, &mut samples);
440            }
441            Backend::OpenDml { samples, cursor: 0 }
442        } else {
443            Backend::Cursor(movi_lists)
444        };
445
446    // total_frames priority for the OpenDML era:
447    //   1. `dmlh.dwTotalFrames` inside `LIST hdrl > LIST odml > dmlh`
448    //      — the spec-mandated 32-bit count for files that may have
449    //      wrapped `avih.dwTotalFrames` (>1 GiB / very long clips).
450    //   2. `avih.dwTotalFrames` for legacy single-RIFF files.
451    //   3. 0 — same "unknown" sentinel as TS (pipeline tolerates).
452    let total_frames = read_dmlh_total_frames(&owned[hdrl_start..hdrl_end])
453        .or_else(|| read_avih_total_frames(&owned[hdrl_start..hdrl_end]))
454        .unwrap_or(0);
455    // Derive duration from total_frames + frame_rate when both are
456    // populated — saves the legacy `samples.len() as f64 / frame_rate`
457    // computation that needed the materialized Vec.
458    let duration = if total_frames > 0 && video.frame_rate > 0.0 {
459        total_frames as f64 / video.frame_rate
460    } else {
461        0.0
462    };
463
464    let info = StreamInfo {
465        codec: codec.clone(),
466        width: video.width,
467        height: video.height,
468        frame_rate: video.frame_rate,
469        duration,
470        pixel_format: PixelFormat::Yuv420p,
471        color_space: ColorSpace::Bt709,
472        color_metadata: Default::default(),
473        total_frames,
474        bitrate: 0,
475    };
476
477    Ok(AviStreamingDemuxer {
478        data: owned,
479        header: DemuxHeader { codec, info },
480        backend,
481        prefix,
482        next_idx: 0,
483        pixel_format_detected: false,
484    })
485}
486
487/// Read `dwTotalFrames` from the MainAVIHeader (`avih`) chunk that lives
488/// as the first chunk inside `LIST hdrl`. AVIMAINHEADER body layout
489/// (Microsoft AVI RIFF reference):
490///   u32 dwMicroSecPerFrame      // 0..4
491///   u32 dwMaxBytesPerSec        // 4..8
492///   u32 dwPaddingGranularity    // 8..12
493///   u32 dwFlags                 // 12..16
494///   u32 dwTotalFrames           // 16..20  ← what we want
495///   ...
496/// Returns `None` if the avih chunk is missing, shorter than 20 bytes,
497/// or the field is zero (some encoders leave it unset; the pipeline
498/// then falls back to "unknown" same as TS).
499fn read_avih_total_frames(hdrl: &[u8]) -> Option<u64> {
500    let mut pos = 0;
501    while pos + 8 <= hdrl.len() {
502        let fcc = &hdrl[pos..pos + 4];
503        let size = u32::from_le_bytes([hdrl[pos + 4], hdrl[pos + 5], hdrl[pos + 6], hdrl[pos + 7]])
504            as usize;
505        let body_start = pos + 8;
506        let body_end = body_start + size;
507        if body_end > hdrl.len() {
508            return None;
509        }
510        if fcc == b"avih" {
511            if size < 20 {
512                return None;
513            }
514            let body = &hdrl[body_start..body_end];
515            let total = u32::from_le_bytes([body[16], body[17], body[18], body[19]]);
516            return if total > 0 { Some(total as u64) } else { None };
517        }
518        pos = body_end + (body_end & 1);
519    }
520    None
521}
522
523/// Read `dwTotalFrames` from the OpenDML extension header chunk
524/// (`dmlh`) which lives inside `LIST hdrl > LIST odml > dmlh`. For
525/// >1 GiB / very long files the spec recommends using this in
526/// preference to `avih.dwTotalFrames` because that field is u32 and
527/// can wrap. `dmlh.dwTotalFrames` is the first (and for our purposes
528/// only) field of the dmlh body. Returns None if the chunk is absent
529/// or the field is zero.
530fn read_dmlh_total_frames(hdrl: &[u8]) -> Option<u64> {
531    let mut pos = 0;
532    while pos + 8 <= hdrl.len() {
533        let fcc = &hdrl[pos..pos + 4];
534        let size = u32::from_le_bytes([hdrl[pos + 4], hdrl[pos + 5], hdrl[pos + 6], hdrl[pos + 7]])
535            as usize;
536        let body_start = pos + 8;
537        let body_end = body_start + size;
538        if body_end > hdrl.len() {
539            return None;
540        }
541        if fcc == b"LIST" && size >= 4 && &hdrl[body_start..body_start + 4] == b"odml" {
542            // Walk the odml LIST body looking for dmlh.
543            let mut p = body_start + 4;
544            while p + 8 <= body_end {
545                let f = &hdrl[p..p + 4];
546                let s = u32::from_le_bytes([hdrl[p + 4], hdrl[p + 5], hdrl[p + 6], hdrl[p + 7]])
547                    as usize;
548                let bs = p + 8;
549                let be = bs + s;
550                if be > body_end {
551                    return None;
552                }
553                if f == b"dmlh" && s >= 4 {
554                    let total =
555                        u32::from_le_bytes([hdrl[bs], hdrl[bs + 1], hdrl[bs + 2], hdrl[bs + 3]]);
556                    return if total > 0 { Some(total as u64) } else { None };
557                }
558                p = be + (be & 1);
559            }
560            return None;
561        }
562        pos = body_end + (body_end & 1);
563    }
564    None
565}
566
567/// Top-level scanner: walks the file picking out `LIST hdrl` (always
568/// in the primary `RIFF AVI ` segment) and every `LIST movi` (which
569/// in OpenDML files is split across the primary `RIFF AVI ` and one
570/// or more `RIFF AVIX` continuation segments at file top level). The
571/// hdrl record is single-occurrence; `movi_lists` accumulates in file
572/// order so the caller can walk segments left-to-right.
573fn scan_top_level_records(
574    data: &[u8],
575    hdrl: &mut Option<(usize, usize)>,
576    movi_lists: &mut Vec<(usize, usize)>,
577) {
578    let mut pos = 0;
579    while pos + 8 <= data.len() {
580        let fcc = &data[pos..pos + 4];
581        let size = u32::from_le_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
582            as usize;
583        let payload_start = pos + 8;
584        let claimed_end = payload_start.saturating_add(size);
585        let payload_end = claimed_end.min(data.len());
586
587        if fcc == b"RIFF" && payload_start + 4 <= payload_end {
588            // Form-type follows the size: `AVI ` for the primary
589            // segment, `AVIX` for OpenDML continuation segments. Both
590            // wrap a sequence of LIST/chunk records inside.
591            let form: [u8; 4] = data[payload_start..payload_start + 4].try_into().unwrap();
592            if &form == b"AVI " || &form == b"AVIX" {
593                scan_riff_segment(data, payload_start + 4, payload_end, hdrl, movi_lists);
594            }
595        } else if fcc == b"LIST" && payload_start + 4 <= payload_end {
596            // Some single-RIFF files surface LIST records at file top
597            // level if a stray byte preceded the outer RIFF — handle
598            // defensively. (Real OpenDML files always use RIFF outer.)
599            classify_list(data, payload_start, payload_end, hdrl, movi_lists);
600        }
601        if claimed_end > data.len() {
602            break;
603        }
604        pos = payload_end + (payload_end & 1);
605    }
606}
607
608/// Walk one `RIFF AVI ` / `RIFF AVIX` segment body recording any
609/// `LIST hdrl` / `LIST movi` records found inside.
610fn scan_riff_segment(
611    data: &[u8],
612    body_start: usize,
613    body_end: usize,
614    hdrl: &mut Option<(usize, usize)>,
615    movi_lists: &mut Vec<(usize, usize)>,
616) {
617    let mut p = body_start;
618    while p + 8 <= body_end {
619        let fcc = &data[p..p + 4];
620        let size =
621            u32::from_le_bytes([data[p + 4], data[p + 5], data[p + 6], data[p + 7]]) as usize;
622        let payload_start = p + 8;
623        let claimed_end = payload_start.saturating_add(size);
624        let payload_end = claimed_end.min(body_end);
625        if fcc == b"LIST" && payload_start + 4 <= payload_end {
626            classify_list(data, payload_start, payload_end, hdrl, movi_lists);
627        }
628        if claimed_end > body_end {
629            break;
630        }
631        p = payload_end + (payload_end & 1);
632    }
633}
634
635/// Inspect a LIST's type field (4 bytes at `payload_start`) and, if
636/// it's `hdrl` or `movi`, record its body range (after the type FOURCC).
637fn classify_list(
638    data: &[u8],
639    payload_start: usize,
640    payload_end: usize,
641    hdrl: &mut Option<(usize, usize)>,
642    movi_lists: &mut Vec<(usize, usize)>,
643) {
644    let list_type: [u8; 4] = data[payload_start..payload_start + 4].try_into().unwrap();
645    match &list_type {
646        b"hdrl" => {
647            // Only record the FIRST hdrl seen — it's defined to be
648            // unique per file, and OpenDML AVIX segments don't carry
649            // their own hdrl.
650            if hdrl.is_none() {
651                *hdrl = Some((payload_start + 4, payload_end));
652            }
653        }
654        b"movi" => movi_lists.push((payload_start + 4, payload_end)),
655        _ => {}
656    }
657}
658
659/// Locate the `indx` superindex chunk inside the `LIST strl` for the
660/// given video stream, and return a list of `(absolute file offset of
661/// each ix## chunk header, ix## chunk body size)` references parsed
662/// from its AVI_INDEX_OF_INDEXES entries. Returns None when:
663/// - the strl LIST doesn't carry an indx (legacy single-`movi` file),
664/// - the indx is the rare AVI_INDEX_OF_CHUNKS form (treated like a
665///   fancy idx1; the cursor walk handles those files correctly), or
666/// - the indx is malformed.
667///
668/// `indx` chunk body layout (24-byte header + N×16-byte entries for
669/// AVI_INDEX_OF_INDEXES, per OpenDML 1.02 §3.7):
670///   wLongsPerEntry     u16   // 4 for index-of-indexes
671///   bIndexSubType      u8    // 0 for index-of-indexes
672///   bIndexType         u8    // 0x00 = AVI_INDEX_OF_INDEXES
673///   nEntriesInUse      u32
674///   dwChunkId          u32   // fcc the entries refer to (e.g. "00dc")
675///   dwReserved[3]      u32×3 // zero
676///   then per entry:
677///     qwOffset         u64   // absolute file offset of an ix## chunk
678///     dwSize           u32   // ix## chunk size (excluding 8-byte hdr)
679///     dwDuration       u32   // sample duration covered by this ix##
680fn locate_stream_indx(hdrl: &[u8], target_stream_idx: u32) -> Option<Vec<(usize, usize)>> {
681    let mut stream_idx: u32 = 0;
682    let mut pos = 0;
683    while pos + 8 <= hdrl.len() {
684        let fcc = &hdrl[pos..pos + 4];
685        let size = u32::from_le_bytes([hdrl[pos + 4], hdrl[pos + 5], hdrl[pos + 6], hdrl[pos + 7]])
686            as usize;
687        let body_start = pos + 8;
688        let body_end = body_start + size;
689        if body_end > hdrl.len() {
690            return None;
691        }
692        if fcc == b"LIST" && size >= 4 && &hdrl[body_start..body_start + 4] == b"strl" {
693            if stream_idx == target_stream_idx {
694                return parse_indx_in_strl(&hdrl[body_start + 4..body_end]);
695            }
696            stream_idx += 1;
697        }
698        pos = body_end + (body_end & 1);
699    }
700    None
701}
702
703fn parse_indx_in_strl(strl: &[u8]) -> Option<Vec<(usize, usize)>> {
704    let mut pos = 0;
705    while pos + 8 <= strl.len() {
706        let fcc = &strl[pos..pos + 4];
707        let size = u32::from_le_bytes([strl[pos + 4], strl[pos + 5], strl[pos + 6], strl[pos + 7]])
708            as usize;
709        let body_start = pos + 8;
710        let body_end = body_start + size;
711        if body_end > strl.len() {
712            return None;
713        }
714        if fcc == b"indx" {
715            return parse_indx_body(&strl[body_start..body_end]);
716        }
717        pos = body_end + (body_end & 1);
718    }
719    None
720}
721
722fn parse_indx_body(body: &[u8]) -> Option<Vec<(usize, usize)>> {
723    if body.len() < 24 {
724        return None;
725    }
726    let longs_per_entry = u16::from_le_bytes([body[0], body[1]]);
727    let _index_sub_type = body[2];
728    let index_type = body[3];
729    let n_entries = u32::from_le_bytes([body[4], body[5], body[6], body[7]]) as usize;
730    // Index-of-chunks form (rare) is handled by the cursor backend.
731    if index_type != 0x00 {
732        return None;
733    }
734    if longs_per_entry != 4 {
735        return None;
736    } // 4 longs = 16 bytes per entry
737    let entries_start = 24;
738    let mut refs = Vec::with_capacity(n_entries);
739    for i in 0..n_entries {
740        let off = entries_start + i * 16;
741        if off + 16 > body.len() {
742            break;
743        }
744        let qw_offset = u64::from_le_bytes([
745            body[off],
746            body[off + 1],
747            body[off + 2],
748            body[off + 3],
749            body[off + 4],
750            body[off + 5],
751            body[off + 6],
752            body[off + 7],
753        ]);
754        let dw_size =
755            u32::from_le_bytes([body[off + 8], body[off + 9], body[off + 10], body[off + 11]]);
756        // _dw_duration at off+12..off+16 is informational; not needed
757        // to walk the chunks.
758        let off_us = qw_offset as usize;
759        refs.push((off_us, dw_size as usize));
760    }
761    Some(refs)
762}
763
764/// Parse an `ix##` chunk at the given absolute file offset and append
765/// each sample chunk's `(absolute payload offset, payload size)` to
766/// `out`. Only entries whose chunk fourcc starts with `<prefix>` are
767/// kept (filters out the rare case of a multi-stream ix## merged into
768/// one file area).
769///
770/// `ix##` chunk body layout (24-byte header + N×8-byte entries for
771/// AVI_INDEX_OF_CHUNKS, per OpenDML 1.02 §3.7):
772///   wLongsPerEntry     u16   // 2 for index-of-chunks
773///   bIndexSubType      u8    // 0 (frame index)
774///   bIndexType         u8    // 0x01 = AVI_INDEX_OF_CHUNKS
775///   nEntriesInUse      u32
776///   dwChunkId          u32   // fcc the entries reference
777///   qwBaseOffset       u64   // entries' dwOffset is relative to this
778///   dwReserved         u32   // zero
779///   then per entry:
780///     dwOffset         u32   // chunk DATA offset from qwBaseOffset
781///     dwSize           u32   // chunk DATA size; high bit = NOT keyframe
782///
783/// Note `dwOffset` points at the chunk DATA, NOT the chunk header
784/// (FOURCC + size) per the OpenDML 1.02 conformance spec — the
785/// reasoning being that the indx/ix## pre-locates the data so a player
786/// can jump directly without re-reading the chunk header. We honor
787/// that: the absolute offset we record is `qwBaseOffset + dwOffset`,
788/// and `size` is the data-only payload size.
789fn parse_ix_chunk(
790    data: &[u8],
791    ix_header_off: usize,
792    _ix_size: usize,
793    prefix: &[u8; 2],
794    out: &mut Vec<(usize, usize)>,
795) {
796    // The ix_header_off given by the indx superindex points at the
797    // ix## chunk's 8-byte RIFF header (FOURCC + LE size). Skip the
798    // header then read the body.
799    if ix_header_off + 8 > data.len() {
800        return;
801    }
802    let body_start = ix_header_off + 8;
803    let body_size = u32::from_le_bytes([
804        data[ix_header_off + 4],
805        data[ix_header_off + 5],
806        data[ix_header_off + 6],
807        data[ix_header_off + 7],
808    ]) as usize;
809    let body_end = body_start.saturating_add(body_size).min(data.len());
810    if body_end < body_start + 24 {
811        return;
812    }
813    let body = &data[body_start..body_end];
814    let longs_per_entry = u16::from_le_bytes([body[0], body[1]]);
815    let _index_sub_type = body[2];
816    let index_type = body[3];
817    let n_entries = u32::from_le_bytes([body[4], body[5], body[6], body[7]]) as usize;
818    let chunk_id: [u8; 4] = body[8..12].try_into().unwrap();
819    let qw_base_offset = u64::from_le_bytes([
820        body[12], body[13], body[14], body[15], body[16], body[17], body[18], body[19],
821    ]) as usize;
822    // body[20..24] is dwReserved (zero).
823    if index_type != 0x01 {
824        return;
825    } // not an index-of-chunks
826    if longs_per_entry != 2 {
827        return;
828    }
829    // Only this stream's chunks (`<prefix>dc` / `<prefix>db`).
830    if chunk_id[0] != prefix[0] || chunk_id[1] != prefix[1] {
831        return;
832    }
833    let kind = chunk_id[3];
834    if kind != b'c' && kind != b'b' {
835        return;
836    }
837    let entries_start = 24;
838    for i in 0..n_entries {
839        let off = entries_start + i * 8;
840        if off + 8 > body.len() {
841            break;
842        }
843        let dw_offset =
844            u32::from_le_bytes([body[off], body[off + 1], body[off + 2], body[off + 3]]) as usize;
845        let dw_size_raw =
846            u32::from_le_bytes([body[off + 4], body[off + 5], body[off + 6], body[off + 7]]);
847        // High bit = "this is NOT a keyframe" flag; mask off to get the
848        // real payload size. (Not used here — we don't track keyframes
849        // at the demux layer for AVI; the codec parses I/P/B itself.)
850        let dw_size = (dw_size_raw & 0x7FFFFFFF) as usize;
851        let abs_off = qw_base_offset.saturating_add(dw_offset);
852        out.push((abs_off, dw_size));
853    }
854}
855
856impl StreamingDemuxer for AviStreamingDemuxer {
857    fn header(&self) -> &DemuxHeader {
858        &self.header
859    }
860
861    fn next_video_sample(&mut self) -> Result<Option<Sample>> {
862        let payload_range = match &mut self.backend {
863            Backend::OpenDml { samples, cursor } => {
864                loop {
865                    if *cursor >= samples.len() {
866                        return Ok(None);
867                    }
868                    let (off, size) = samples[*cursor];
869                    *cursor += 1;
870                    let end = off
871                        .checked_add(size)
872                        .ok_or_else(|| anyhow::anyhow!("AVI: ix## entry overflows usize"))?;
873                    if end > self.data.len() {
874                        // Truncated tail — skip rather than bail; matches
875                        // the cursor-walk's "stop on EOF" posture.
876                        continue;
877                    }
878                    break Some((off, end));
879                }
880            }
881            Backend::Cursor(walk) => {
882                loop {
883                    // Pop empty frames off the walk stack.
884                    while let Some(&(pos, end)) = walk.last() {
885                        if pos + 8 <= end {
886                            break;
887                        }
888                        walk.pop();
889                    }
890                    let Some(&mut (ref mut pos, end)) = walk.last_mut() else {
891                        return Ok(None);
892                    };
893
894                    let fcc: [u8; 4] = self.data[*pos..*pos + 4].try_into()?;
895                    let size = u32::from_le_bytes([
896                        self.data[*pos + 4],
897                        self.data[*pos + 5],
898                        self.data[*pos + 6],
899                        self.data[*pos + 7],
900                    ]) as usize;
901                    let payload_start = *pos + 8;
902                    let payload_end = payload_start + size;
903                    if payload_end > end || payload_end > self.data.len() {
904                        // Truncated — pop this frame and resume parent.
905                        walk.pop();
906                        continue;
907                    }
908
909                    // Advance past this chunk on the cursor for the NEXT call.
910                    *pos = payload_end + (payload_end & 1);
911
912                    if &fcc == b"LIST" && payload_start + 4 <= payload_end {
913                        let list_type: [u8; 4] =
914                            self.data[payload_start..payload_start + 4].try_into()?;
915                        if &list_type == b"rec " {
916                            // Push the inner walk frame and recurse.
917                            walk.push((payload_start + 4, payload_end));
918                            continue;
919                        }
920                        continue; // unknown LIST — skip
921                    }
922
923                    if fcc[0] != self.prefix[0] || fcc[1] != self.prefix[1] {
924                        continue; // wrong stream
925                    }
926                    let kind = fcc[3];
927                    if kind != b'c' && kind != b'b' {
928                        continue; // not a video sample chunk
929                    }
930                    break Some((payload_start, payload_end));
931                }
932            }
933        };
934        let Some((start, end)) = payload_range else {
935            return Ok(None);
936        };
937
938        let pts_ticks = self.next_idx as i64;
939        self.next_idx += 1;
940        let data = self.data[start..end].to_vec();
941        if !self.pixel_format_detected {
942            let detected =
943                codec::pixel_format::detect(&self.header.codec, std::slice::from_ref(&data));
944            self.header.info.pixel_format = detected;
945            self.pixel_format_detected = true;
946        }
947        Ok(Some(Sample {
948            data,
949            pts_ticks,
950            duration_ticks: 0,
951        }))
952    }
953
954    fn audio(&self) -> Option<&AudioTrack> {
955        // AVI audio passthrough is not supported (the legacy path also
956        // returns audio: None) — out of scope for this sprint.
957        None
958    }
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964
965    /// Build a minimal RIFF chunk: little-endian 4-byte size header.
966    fn chunk(fourcc: &[u8; 4], payload: &[u8]) -> Vec<u8> {
967        let mut out = Vec::with_capacity(8 + payload.len());
968        out.extend_from_slice(fourcc);
969        out.extend_from_slice(&(payload.len() as u32).to_le_bytes());
970        out.extend_from_slice(payload);
971        if out.len() & 1 == 1 {
972            out.push(0);
973        } // word-align
974        out
975    }
976
977    /// Wrap a payload as `LIST <type> <payload>`.
978    fn list(list_type: &[u8; 4], payload: &[u8]) -> Vec<u8> {
979        let mut body = Vec::with_capacity(4 + payload.len());
980        body.extend_from_slice(list_type);
981        body.extend_from_slice(payload);
982        chunk(b"LIST", &body)
983    }
984
985    /// Emit a strh + strf pair for one video stream using a given fcc.
986    fn video_strl(
987        handler: &[u8; 4],
988        compression: &[u8; 4],
989        w: u32,
990        h: u32,
991        rate: u32,
992        scale: u32,
993    ) -> Vec<u8> {
994        let mut strh = Vec::with_capacity(56);
995        strh.extend_from_slice(b"vids");
996        strh.extend_from_slice(handler);
997        strh.extend_from_slice(&[0u8; 12]); // flags/priority/lang/initial
998        strh.extend_from_slice(&scale.to_le_bytes());
999        strh.extend_from_slice(&rate.to_le_bytes());
1000        strh.extend_from_slice(&[0u8; 24]); // start/length/buf/quality/samplesize/rect
1001        let strh_chunk = chunk(b"strh", &strh);
1002
1003        let mut strf = Vec::with_capacity(40);
1004        strf.extend_from_slice(&40u32.to_le_bytes()); // biSize
1005        strf.extend_from_slice(&(w as i32).to_le_bytes()); // biWidth
1006        strf.extend_from_slice(&(h as i32).to_le_bytes()); // biHeight
1007        strf.extend_from_slice(&1u16.to_le_bytes()); // biPlanes
1008        strf.extend_from_slice(&24u16.to_le_bytes()); // biBitCount
1009        strf.extend_from_slice(compression); // biCompression
1010        strf.extend_from_slice(&[0u8; 20]); // remaining BIH fields
1011        let strf_chunk = chunk(b"strf", &strf);
1012
1013        let mut strl_body = Vec::new();
1014        strl_body.extend_from_slice(&strh_chunk);
1015        strl_body.extend_from_slice(&strf_chunk);
1016        list(b"strl", &strl_body)
1017    }
1018
1019    #[test]
1020    fn demux_minimal_xvid_avi_emits_samples() {
1021        // hdrl LIST: dummy avih + one video strl with XVID fourcc.
1022        let mut hdrl_body = Vec::new();
1023        hdrl_body.extend_from_slice(&chunk(b"avih", &[0u8; 56])); // MainAVIHeader
1024        hdrl_body.extend_from_slice(&video_strl(b"XVID", b"XVID", 320, 240, 30, 1));
1025        let hdrl = list(b"hdrl", &hdrl_body);
1026
1027        // movi LIST: three compressed DIB samples (00dc) of distinct payloads.
1028        let mut movi_body = Vec::new();
1029        movi_body.extend_from_slice(&chunk(b"00dc", b"frame-1-bytes"));
1030        movi_body.extend_from_slice(&chunk(b"01wb", b"audio-ignored"));
1031        movi_body.extend_from_slice(&chunk(b"00dc", b"frame-2"));
1032        movi_body.extend_from_slice(&chunk(b"00dc", b"frame-3-payload"));
1033        let movi = list(b"movi", &movi_body);
1034
1035        // Outer RIFF.
1036        let mut riff_body = Vec::new();
1037        riff_body.extend_from_slice(b"AVI ");
1038        riff_body.extend_from_slice(&hdrl);
1039        riff_body.extend_from_slice(&movi);
1040
1041        let mut file = Vec::with_capacity(8 + riff_body.len());
1042        file.extend_from_slice(b"RIFF");
1043        file.extend_from_slice(&(riff_body.len() as u32).to_le_bytes());
1044        file.extend_from_slice(&riff_body);
1045
1046        let d = demux_avi(&file).expect("demux");
1047        assert_eq!(d.codec, "mpeg4");
1048        assert_eq!(d.info.width, 320);
1049        assert_eq!(d.info.height, 240);
1050        assert_eq!(d.samples.len(), 3);
1051        assert_eq!(d.samples[0], b"frame-1-bytes");
1052        assert_eq!(d.samples[1], b"frame-2");
1053        assert_eq!(d.samples[2], b"frame-3-payload");
1054    }
1055
1056    #[test]
1057    fn demux_rejects_unknown_fourcc() {
1058        let mut hdrl_body = Vec::new();
1059        hdrl_body.extend_from_slice(&chunk(b"avih", &[0u8; 56]));
1060        hdrl_body.extend_from_slice(&video_strl(b"ZZZZ", b"ZZZZ", 100, 100, 30, 1));
1061        let hdrl = list(b"hdrl", &hdrl_body);
1062        let movi = list(b"movi", &chunk(b"00dc", b"x"));
1063        let mut body = Vec::new();
1064        body.extend_from_slice(b"AVI ");
1065        body.extend_from_slice(&hdrl);
1066        body.extend_from_slice(&movi);
1067        let mut file = Vec::new();
1068        file.extend_from_slice(b"RIFF");
1069        file.extend_from_slice(&(body.len() as u32).to_le_bytes());
1070        file.extend_from_slice(&body);
1071        assert!(demux_avi(&file).is_err());
1072    }
1073
1074    #[test]
1075    fn demux_handles_divx_variants() {
1076        for fcc in [b"DIVX", b"DX50", b"DIV3", b"XviD"] {
1077            let mut hdrl_body = Vec::new();
1078            hdrl_body.extend_from_slice(&chunk(b"avih", &[0u8; 56]));
1079            hdrl_body.extend_from_slice(&video_strl(fcc, fcc, 640, 480, 25, 1));
1080            let hdrl = list(b"hdrl", &hdrl_body);
1081            let movi = list(b"movi", &chunk(b"00dc", b"sample"));
1082            let mut body = Vec::new();
1083            body.extend_from_slice(b"AVI ");
1084            body.extend_from_slice(&hdrl);
1085            body.extend_from_slice(&movi);
1086            let mut file = Vec::new();
1087            file.extend_from_slice(b"RIFF");
1088            file.extend_from_slice(&(body.len() as u32).to_le_bytes());
1089            file.extend_from_slice(&body);
1090            let d = demux_avi(&file).expect("should demux");
1091            assert_eq!(d.codec, "mpeg4", "fourcc {:?} did not map to mpeg4", fcc);
1092        }
1093    }
1094
1095    // ----- OpenDML 1.0 super-index fixture tests (Squad-38) -----
1096
1097    /// Build a synthetic OpenDML AVI: 2 movi LISTs each with 3 video
1098    /// chunks (XVID), an indx super-index pointing at 2 ix00 sub-indexes,
1099    /// each ix00 listing the 3 chunks in its movi, and `dmlh` reporting
1100    /// `dwTotalFrames=6`. Returns the assembled file bytes plus the six
1101    /// expected sample payloads in order, so tests can assert offsets +
1102    /// content.
1103    ///
1104    /// Layout (sizes computed bottom-up so absolute offsets work out):
1105    ///   `RIFF AVI ` segment
1106    ///     `LIST hdrl`
1107    ///       `avih` (dwTotalFrames=3 — only counts the first segment;
1108    ///                we expect dmlh's 6 to win)
1109    ///       `LIST strl`
1110    ///         strh (XVID), strf (320×240),
1111    ///         indx superindex pointing at the two ix00 chunks
1112    ///       `LIST odml` { dmlh (dwTotalFrames=6) }
1113    ///     `LIST movi` { 00dc×3 }
1114    ///     ix00 (3 entries pointing into movi#1)
1115    ///   `RIFF AVIX` segment
1116    ///     `LIST movi` { 00dc×3 }
1117    ///     ix00 (3 entries pointing into movi#2)
1118    fn build_opendml_two_movi_six_samples() -> (Vec<u8>, Vec<Vec<u8>>) {
1119        // The six sample payloads — distinct so we can assert ordering.
1120        let payloads: Vec<Vec<u8>> = (0..6)
1121            .map(|i| format!("opendml-frame-{i}").into_bytes())
1122            .collect();
1123
1124        // ----- Inner movi bodies + ix00 stub layout planning -----
1125        // We build movi LISTs first, then plan ix00 chunks from the
1126        // resulting per-chunk offsets, then assemble outer RIFF segments
1127        // so we know the absolute file offsets of each ix00 chunk
1128        // (needed for the indx superindex entries).
1129
1130        // movi#1 body: three 00dc chunks with payloads 0, 1, 2.
1131        // We'll record (offset_into_movi_body_of_chunk_data, size) for each.
1132        let mut movi1_body = Vec::new();
1133        let mut chunk_data_offsets_in_movi1 = Vec::new();
1134        for i in 0..3 {
1135            let cur_off = movi1_body.len();
1136            // Chunk header is 8 bytes; data starts at cur_off + 8.
1137            let c = chunk(b"00dc", &payloads[i]);
1138            movi1_body.extend_from_slice(&c);
1139            chunk_data_offsets_in_movi1.push((cur_off + 8, payloads[i].len()));
1140        }
1141
1142        // movi#2 body: three 00dc chunks with payloads 3, 4, 5.
1143        let mut movi2_body = Vec::new();
1144        let mut chunk_data_offsets_in_movi2 = Vec::new();
1145        for i in 3..6 {
1146            let cur_off = movi2_body.len();
1147            let c = chunk(b"00dc", &payloads[i]);
1148            movi2_body.extend_from_slice(&c);
1149            chunk_data_offsets_in_movi2.push((cur_off + 8, payloads[i].len()));
1150        }
1151
1152        // The movi LIST wraps a 4-byte type ("movi") + body. So the
1153        // body starts +12 from the LIST chunk's start (+8 chunk header
1154        // + 4 type fourcc).
1155        let movi1_chunk = list(b"movi", &movi1_body);
1156        let movi2_chunk = list(b"movi", &movi2_body);
1157
1158        // Build the two ix00 chunks. Each ix## chunk body layout:
1159        //   wLongsPerEntry=2 (u16), bIndexSubType=0 (u8),
1160        //   bIndexType=0x01 (u8), nEntriesInUse=N (u32),
1161        //   dwChunkId="00dc" (u32), qwBaseOffset (u64),
1162        //   dwReserved=0 (u32), then per-entry (dwOffset, dwSize) u32×2.
1163        //
1164        // We point qwBaseOffset at the start of the corresponding movi
1165        // LIST's BODY (i.e. the byte right after `movi` type fourcc).
1166        // dwOffset for each entry is the offset of the chunk DATA from
1167        // qwBaseOffset, i.e. exactly `chunk_data_offsets_in_moviX[i].0`.
1168        let build_ix00 = |entries: &[(usize, usize)], qw_base_offset: u64| -> Vec<u8> {
1169            let mut body = Vec::new();
1170            body.extend_from_slice(&2u16.to_le_bytes()); // wLongsPerEntry
1171            body.push(0); // bIndexSubType
1172            body.push(0x01); // bIndexType=AVI_INDEX_OF_CHUNKS
1173            body.extend_from_slice(&(entries.len() as u32).to_le_bytes()); // nEntriesInUse
1174            body.extend_from_slice(b"00dc"); // dwChunkId
1175            body.extend_from_slice(&qw_base_offset.to_le_bytes()); // qwBaseOffset
1176            body.extend_from_slice(&0u32.to_le_bytes()); // dwReserved
1177            for (data_off, data_size) in entries {
1178                body.extend_from_slice(&(*data_off as u32).to_le_bytes()); // dwOffset
1179                body.extend_from_slice(&(*data_size as u32).to_le_bytes()); // dwSize
1180            }
1181            chunk(b"ix00", &body)
1182        };
1183
1184        // We need the absolute file offsets of the two movi BODIES and
1185        // the two ix00 CHUNK HEADERS to fill the indx superindex.
1186        // Layout of the outer file is:
1187        //   [0..8]      "RIFF" + size32 of the AVI  segment payload
1188        //   [8..12]     "AVI " form type
1189        //   [12..]      LIST hdrl ... (size depends on indx contents
1190        //                — chicken/egg, we resolve below)
1191        //               LIST movi#1 ... (movi1_chunk)
1192        //               ix00#1 ... (ix1)
1193        //   then        "RIFF" + size32 of the AVIX segment payload
1194        //               "AVIX" form type
1195        //               LIST movi#2 ... (movi2_chunk)
1196        //               ix00#2 ... (ix2)
1197        //
1198        // To break the cycle, build hdrl with placeholder indx values
1199        // first, measure the resulting byte sizes, compute final
1200        // offsets, then rewrite the indx body and reassemble.
1201
1202        // Build hdrl first with a PLACEHOLDER indx (zeroed offsets) so
1203        // we know the hdrl size — which doesn't change when we patch
1204        // the placeholder qwOffset values (size stays constant).
1205        let placeholder_indx = build_indx_placeholder();
1206        let hdrl_with_placeholder = build_hdrl(
1207            &placeholder_indx,
1208            /*dmlh_total*/ 6,
1209            /*avih_total*/ 3,
1210        );
1211
1212        // Compute the absolute offsets we need to know AHEAD of writing
1213        // the real indx: positions of movi#1 body, movi#2 body,
1214        // ix00#1 chunk header, ix00#2 chunk header.
1215
1216        // Position 0 of the file = "RIFF" header start. The AVI  segment
1217        // body begins at byte 12 (after RIFF/size/AVI ).
1218        let avi_body_start = 12usize;
1219        let hdrl_offset = avi_body_start; // hdrl is the first record
1220        let hdrl_end = hdrl_offset + hdrl_with_placeholder.len();
1221
1222        let movi1_offset = hdrl_end; // movi LIST chunk header start
1223        // movi LIST body starts at movi1_offset + 8 (LIST hdr) + 4 (type "movi") = +12
1224        let movi1_body_offset = movi1_offset + 12;
1225        let movi1_end = movi1_offset + movi1_chunk.len();
1226
1227        let ix1_offset = movi1_end; // ix00 chunk header for movi#1
1228        // ix00 chunk size doesn't depend on placeholder vs real values —
1229        // build a real one with the right qwBaseOffset to measure its byte
1230        // length (constant for fixed entries).
1231        let ix1_chunk_real = build_ix00(&chunk_data_offsets_in_movi1, movi1_body_offset as u64);
1232        let ix1_end = ix1_offset + ix1_chunk_real.len();
1233
1234        // Now the second `RIFF AVIX` segment starts.
1235        let avix_outer_start = ix1_end;
1236        // RIFF chunk header (8) + form type "AVIX" (4) = 12 bytes before body.
1237        let avix_body_start = avix_outer_start + 12;
1238
1239        let movi2_offset = avix_body_start;
1240        let movi2_body_offset = movi2_offset + 12;
1241        let movi2_end = movi2_offset + movi2_chunk.len();
1242
1243        let ix2_offset = movi2_end;
1244        let ix2_chunk_real = build_ix00(&chunk_data_offsets_in_movi2, movi2_body_offset as u64);
1245
1246        // Real indx superindex pointing at the two ix00 chunks.
1247        let real_indx = build_indx_real(&[
1248            (
1249                ix1_offset as u64,
1250                (ix1_chunk_real.len() - 8) as u32,
1251                /*dur*/ 3,
1252            ),
1253            (
1254                ix2_offset as u64,
1255                (ix2_chunk_real.len() - 8) as u32,
1256                /*dur*/ 3,
1257            ),
1258        ]);
1259        // Sanity: real and placeholder indx must be byte-identical in length.
1260        assert_eq!(
1261            real_indx.len(),
1262            placeholder_indx.len(),
1263            "indx size sanity — placeholder and real must match for offsets to stay valid"
1264        );
1265
1266        let hdrl_real = build_hdrl(&real_indx, 6, 3);
1267        assert_eq!(
1268            hdrl_real.len(),
1269            hdrl_with_placeholder.len(),
1270            "hdrl size sanity — must not depend on indx values, only sizes"
1271        );
1272
1273        // Assemble AVI  segment body (after the RIFF "AVI " 12-byte header).
1274        let mut avi_seg_body = Vec::new();
1275        avi_seg_body.extend_from_slice(b"AVI ");
1276        avi_seg_body.extend_from_slice(&hdrl_real);
1277        avi_seg_body.extend_from_slice(&movi1_chunk);
1278        avi_seg_body.extend_from_slice(&ix1_chunk_real);
1279        // RIFF wrapper for the AVI segment.
1280        let mut file = Vec::new();
1281        file.extend_from_slice(b"RIFF");
1282        file.extend_from_slice(&(avi_seg_body.len() as u32).to_le_bytes());
1283        file.extend_from_slice(&avi_seg_body);
1284
1285        // Assemble AVIX segment body.
1286        let mut avix_seg_body = Vec::new();
1287        avix_seg_body.extend_from_slice(b"AVIX");
1288        avix_seg_body.extend_from_slice(&movi2_chunk);
1289        avix_seg_body.extend_from_slice(&ix2_chunk_real);
1290        file.extend_from_slice(b"RIFF");
1291        file.extend_from_slice(&(avix_seg_body.len() as u32).to_le_bytes());
1292        file.extend_from_slice(&avix_seg_body);
1293
1294        // Sanity: confirm the actual byte positions match what we
1295        // computed (catches any off-by-one in the layout planning).
1296        assert_eq!(
1297            &file[movi1_offset..movi1_offset + 4],
1298            b"LIST",
1299            "movi#1 should start with LIST at the planned offset"
1300        );
1301        assert_eq!(
1302            &file[movi1_body_offset - 4..movi1_body_offset],
1303            b"movi",
1304            "movi#1 type fourcc should sit just before the body"
1305        );
1306        assert_eq!(&file[ix1_offset..ix1_offset + 4], b"ix00");
1307        assert_eq!(&file[movi2_offset..movi2_offset + 4], b"LIST");
1308        assert_eq!(&file[movi2_body_offset - 4..movi2_body_offset], b"movi");
1309        assert_eq!(&file[ix2_offset..ix2_offset + 4], b"ix00");
1310
1311        (file, payloads)
1312    }
1313
1314    /// Build a placeholder indx chunk with the right byte size for two
1315    /// AVI_INDEX_OF_INDEXES entries but zeroed qwOffset / dwSize so we
1316    /// can measure the chunk's overall size before knowing the real
1317    /// offsets of the ix00 chunks it points at.
1318    fn build_indx_placeholder() -> Vec<u8> {
1319        build_indx_real(&[(0, 0, 0), (0, 0, 0)])
1320    }
1321
1322    /// Build a real indx (AVI_INDEX_OF_INDEXES) referring to the given
1323    /// `(qwOffset, dwSize, dwDuration)` triples.
1324    fn build_indx_real(entries: &[(u64, u32, u32)]) -> Vec<u8> {
1325        let mut body = Vec::new();
1326        body.extend_from_slice(&4u16.to_le_bytes()); // wLongsPerEntry=4
1327        body.push(0); // bIndexSubType
1328        body.push(0x00); // bIndexType=AVI_INDEX_OF_INDEXES
1329        body.extend_from_slice(&(entries.len() as u32).to_le_bytes()); // nEntriesInUse
1330        body.extend_from_slice(b"00dc"); // dwChunkId
1331        body.extend_from_slice(&[0u8; 12]); // dwReserved[3]
1332        for (qw_off, dw_size, dw_duration) in entries {
1333            body.extend_from_slice(&qw_off.to_le_bytes());
1334            body.extend_from_slice(&dw_size.to_le_bytes());
1335            body.extend_from_slice(&dw_duration.to_le_bytes());
1336        }
1337        chunk(b"indx", &body)
1338    }
1339
1340    /// Build hdrl LIST containing avih (dwTotalFrames=avih_total),
1341    /// strl with XVID strh+strf+indx, and odml LIST with dmlh
1342    /// (dwTotalFrames=dmlh_total).
1343    fn build_hdrl(indx_chunk: &[u8], dmlh_total: u32, avih_total: u32) -> Vec<u8> {
1344        // avih: u32 dwMicroSecPerFrame, dwMaxBytesPerSec, dwPaddingGranularity,
1345        // dwFlags, dwTotalFrames, then enough zeros to fill 56 bytes.
1346        let mut avih_body = Vec::with_capacity(56);
1347        avih_body.extend_from_slice(&33333u32.to_le_bytes()); // ~30 fps
1348        avih_body.extend_from_slice(&[0u8; 12]); // bytes/sec, padding, flags
1349        avih_body.extend_from_slice(&avih_total.to_le_bytes());
1350        avih_body.extend_from_slice(&[0u8; 32]); // initial frames + remaining 7 fields
1351        let avih_chunk = chunk(b"avih", &avih_body);
1352
1353        // strl with XVID + indx tacked on the end (lives inside strl per
1354        // the OpenDML spec).
1355        let strh_chunk = {
1356            let mut strh = Vec::with_capacity(56);
1357            strh.extend_from_slice(b"vids");
1358            strh.extend_from_slice(b"XVID");
1359            strh.extend_from_slice(&[0u8; 12]);
1360            strh.extend_from_slice(&1u32.to_le_bytes()); // dwScale
1361            strh.extend_from_slice(&30u32.to_le_bytes()); // dwRate
1362            strh.extend_from_slice(&[0u8; 24]);
1363            chunk(b"strh", &strh)
1364        };
1365        let strf_chunk = {
1366            let mut strf = Vec::with_capacity(40);
1367            strf.extend_from_slice(&40u32.to_le_bytes());
1368            strf.extend_from_slice(&320i32.to_le_bytes());
1369            strf.extend_from_slice(&240i32.to_le_bytes());
1370            strf.extend_from_slice(&1u16.to_le_bytes());
1371            strf.extend_from_slice(&24u16.to_le_bytes());
1372            strf.extend_from_slice(b"XVID");
1373            strf.extend_from_slice(&[0u8; 20]);
1374            chunk(b"strf", &strf)
1375        };
1376        let mut strl_body = Vec::new();
1377        strl_body.extend_from_slice(&strh_chunk);
1378        strl_body.extend_from_slice(&strf_chunk);
1379        strl_body.extend_from_slice(indx_chunk);
1380        let strl_chunk = list(b"strl", &strl_body);
1381
1382        // odml LIST: contains dmlh chunk with the total frame count.
1383        let dmlh_chunk = {
1384            let mut body = Vec::new();
1385            body.extend_from_slice(&dmlh_total.to_le_bytes());
1386            // dmlh is allowed to contain more reserved fields; we keep
1387            // it minimal at 4 bytes — every parser only reads the first
1388            // u32.
1389            chunk(b"dmlh", &body)
1390        };
1391        let odml_chunk = list(b"odml", &dmlh_chunk);
1392
1393        let mut hdrl_body = Vec::new();
1394        hdrl_body.extend_from_slice(&avih_chunk);
1395        hdrl_body.extend_from_slice(&strl_chunk);
1396        hdrl_body.extend_from_slice(&odml_chunk);
1397        list(b"hdrl", &hdrl_body)
1398    }
1399
1400    #[test]
1401    fn opendml_streaming_walks_both_movi_lists_in_order() {
1402        let (file, expected) = build_opendml_two_movi_six_samples();
1403        let mut d = demux_avi_streaming_init(&file).expect("OpenDML init");
1404        // dmlh.dwTotalFrames=6 should win over avih.dwTotalFrames=3.
1405        assert_eq!(d.header.info.total_frames, 6);
1406        // Drain — six samples, in superindex (file) order.
1407        let mut got = Vec::new();
1408        while let Some(s) = d.next_video_sample().expect("next") {
1409            got.push(s.data);
1410        }
1411        assert_eq!(
1412            got.len(),
1413            6,
1414            "should pull all six samples across both movi LISTs"
1415        );
1416        for (i, (g, e)) in got.iter().zip(expected.iter()).enumerate() {
1417            assert_eq!(
1418                g, e,
1419                "sample {i} mismatch — OpenDML walk lost ordering or content"
1420            );
1421        }
1422    }
1423
1424    #[test]
1425    fn opendml_legacy_demux_also_walks_both_movi_lists() {
1426        // The legacy `demux_avi` (Vec materialization path) must also
1427        // pick up multi-movi for the bench / fidelity tests that don't
1428        // use streaming.
1429        let (file, expected) = build_opendml_two_movi_six_samples();
1430        let d = demux_avi(&file).expect("legacy demux");
1431        assert_eq!(d.samples.len(), 6);
1432        for (i, (g, e)) in d.samples.iter().zip(expected.iter()).enumerate() {
1433            assert_eq!(g, e, "legacy sample {i} mismatch");
1434        }
1435        assert_eq!(
1436            d.info.total_frames, 6,
1437            "legacy total_frames should honor dmlh"
1438        );
1439    }
1440
1441    #[test]
1442    fn opendml_total_frames_prefers_dmlh_over_avih() {
1443        let (file, _) = build_opendml_two_movi_six_samples();
1444        let d = demux_avi_streaming_init(&file).expect("init");
1445        assert_eq!(
1446            d.header.info.total_frames, 6,
1447            "dmlh.dwTotalFrames (6) must win over avih.dwTotalFrames (3)"
1448        );
1449        // Duration sanity: 6 frames / 30 fps = 0.2s (frame_rate from strh).
1450        assert!(
1451            (d.header.info.duration - 0.2).abs() < 1e-6,
1452            "duration = total_frames / frame_rate, got {}",
1453            d.header.info.duration
1454        );
1455    }
1456
1457    #[test]
1458    fn opendml_picks_indx_path_not_cursor_walk() {
1459        // White-box: the demuxer's backend should be OpenDml when the
1460        // input has an indx superindex. Confirms the dispatch took the
1461        // intended path and we're not accidentally running the cursor
1462        // walk over both movi LISTs (which would also pass the sample-
1463        // count test but defeats the streaming RSS goal for >1 GiB
1464        // files because the cursor walk reads through every byte).
1465        let (file, _) = build_opendml_two_movi_six_samples();
1466        let d = demux_avi_streaming_init(&file).expect("init");
1467        assert!(
1468            matches!(d.backend, Backend::OpenDml { .. }),
1469            "fixture has indx — backend must be OpenDml"
1470        );
1471    }
1472
1473    #[test]
1474    fn legacy_single_movi_without_indx_uses_cursor_backend() {
1475        // Backward-compat: a single-movi AVI without indx must still
1476        // work via the legacy cursor path (Squad-13's contract).
1477        let mut hdrl_body = Vec::new();
1478        hdrl_body.extend_from_slice(&chunk(b"avih", &[0u8; 56]));
1479        hdrl_body.extend_from_slice(&video_strl(b"XVID", b"XVID", 320, 240, 30, 1));
1480        let hdrl = list(b"hdrl", &hdrl_body);
1481        let mut movi_body = Vec::new();
1482        movi_body.extend_from_slice(&chunk(b"00dc", b"f0"));
1483        movi_body.extend_from_slice(&chunk(b"00dc", b"f1"));
1484        let movi = list(b"movi", &movi_body);
1485        let mut riff_body = Vec::new();
1486        riff_body.extend_from_slice(b"AVI ");
1487        riff_body.extend_from_slice(&hdrl);
1488        riff_body.extend_from_slice(&movi);
1489        let mut file = Vec::new();
1490        file.extend_from_slice(b"RIFF");
1491        file.extend_from_slice(&(riff_body.len() as u32).to_le_bytes());
1492        file.extend_from_slice(&riff_body);
1493
1494        let mut d = demux_avi_streaming_init(&file).expect("init");
1495        assert!(
1496            matches!(d.backend, Backend::Cursor(_)),
1497            "no indx → must take cursor backend (legacy path)"
1498        );
1499        let s0 = d.next_video_sample().unwrap().unwrap();
1500        let s1 = d.next_video_sample().unwrap().unwrap();
1501        assert_eq!(s0.data, b"f0");
1502        assert_eq!(s1.data, b"f1");
1503        assert!(d.next_video_sample().unwrap().is_none());
1504    }
1505
1506    #[test]
1507    fn parse_indx_body_decodes_two_index_of_indexes_entries() {
1508        // Direct test of the indx body parser — wire layout regression.
1509        let entries = [
1510            (0xDEAD_BEEFu64, 0x1234u32, 100u32),
1511            (0xCAFE_F00Du64, 0x5678u32, 200u32),
1512        ];
1513        let chunk_bytes = build_indx_real(&entries);
1514        // Skip the 8-byte chunk header to get the body.
1515        let body = &chunk_bytes[8..8 + (chunk_bytes.len() - 8 - (chunk_bytes.len() & 1))];
1516        let parsed = parse_indx_body(body).expect("parse");
1517        assert_eq!(parsed.len(), 2);
1518        assert_eq!(parsed[0], (0xDEAD_BEEFusize, 0x1234usize));
1519        assert_eq!(parsed[1], (0xCAFE_F00Dusize, 0x5678usize));
1520    }
1521
1522    #[test]
1523    fn read_dmlh_total_frames_finds_value_inside_odml_list() {
1524        let dmlh_chunk = {
1525            let mut body = Vec::new();
1526            body.extend_from_slice(&42u32.to_le_bytes());
1527            body.extend_from_slice(&[0u8; 244]); // pad to spec's 248-byte minimum
1528            chunk(b"dmlh", &body)
1529        };
1530        let odml = list(b"odml", &dmlh_chunk);
1531        let mut hdrl_body = Vec::new();
1532        hdrl_body.extend_from_slice(&chunk(b"avih", &[0u8; 56]));
1533        hdrl_body.extend_from_slice(&odml);
1534        // Strip the outer LIST header — read_dmlh_total_frames takes the
1535        // hdrl body (starts after `hdrl` type fourcc).
1536        assert_eq!(read_dmlh_total_frames(&hdrl_body), Some(42));
1537    }
1538
1539    #[test]
1540    fn read_dmlh_total_frames_returns_none_when_odml_absent() {
1541        let mut hdrl_body = Vec::new();
1542        hdrl_body.extend_from_slice(&chunk(b"avih", &[0u8; 56]));
1543        // No odml LIST → fall through to None.
1544        assert_eq!(read_dmlh_total_frames(&hdrl_body), None);
1545    }
1546}