Skip to main content

imferno_core/mxf/
mod.rs

1//! SMPTE ST 377-1: Material Exchange Format (MXF) header parser.
2//!
3//! Reads the header partition pack from an MXF file and extracts:
4//! - Operational Pattern UL (OP1a, OP1b, etc.)
5//! - Essence Container ULs (codec container labels)
6//!
7//! Scope: partition-pack level only. Full header metadata set parsing
8//! (Preface, MaterialPackage, essence descriptors) is out of scope for
9//! this phase — CPL EssenceDescriptors are the primary source of format info.
10
11/// ST 2067-2 §5.3 audio MCA rules applied against the RegXML output
12/// of `mxf::metadata`. WAVE PCM requirement, sample rate / quant-bits
13/// whitelist, channel-label count match, SoundfieldGroupLabel
14/// singleton. Native-only.
15#[cfg(not(target_arch = "wasm32"))]
16pub mod audio_mca;
17pub mod codes;
18/// MXF essence-header validation backed by `smpte-mxf`. Native-only —
19/// the wasm validator never sees MXF binaries (browser callers upload
20/// the XML side of an IMF package), so this module isn't compiled for
21/// `target_arch = "wasm32"`.
22#[cfg(not(target_arch = "wasm32"))]
23pub mod essence;
24/// MXF header-metadata extraction via `regxml` — converts the full
25/// Preface tree (MaterialPackage, descriptors, MCA sub-descriptors)
26/// to RegXML for typed essence-rule application. Native-only.
27#[cfg(not(target_arch = "wasm32"))]
28pub mod metadata;
29/// ST 2067-2 §5.4 timed-text essence rules applied against RegXML.
30/// UCSEncoding=UTF-8, NamespaceURI ∈ IMSC1, MIMEType whitelist.
31/// Native-only.
32#[cfg(not(target_arch = "wasm32"))]
33pub mod timed_text;
34
35use std::io::Read;
36use std::path::Path;
37use thiserror::Error;
38
39/// A rational number representing a sample rate (numerator/denominator).
40///
41/// Used for `SampleRate` fields in MXF essence descriptors (ST 377-1).
42/// Distinct from `st2067_3::EditRate` — same representation, different domain.
43#[derive(Debug, Clone, PartialEq)]
44pub struct SampleRate {
45    pub numerator: i64,
46    pub denominator: i64,
47}
48
49// ─── Error ────────────────────────────────────────────────────────────────────
50
51#[derive(Debug, Error)]
52pub enum MxfParseError {
53    #[error("IO error: {0}")]
54    Io(#[from] std::io::Error),
55    #[error("Not a valid MXF file: invalid header partition pack key")]
56    NotMxf,
57    #[error("KLV parse error at byte offset {offset}: {message}")]
58    KlvError { offset: u64, message: String },
59    #[error("Header partition pack missing or too short (got {got} bytes, need ≥ {need})")]
60    PartitionPackTooShort { got: usize, need: usize },
61    /// The partition pack declares more bytes than the parser will read
62    /// (`MAX_PP_BODY = 4096`). Real-world IMF header partition packs are
63    /// well under 1 KiB; lengths above the cap suggest a corrupted file or
64    /// an unexpected MXF dialect — we error rather than silently truncate.
65    #[error("Header partition pack body too large (got {got} bytes, parser cap is {cap})")]
66    PartitionPackTooLarge { got: usize, cap: usize },
67}
68
69type Result<T> = std::result::Result<T, MxfParseError>;
70
71// ─── Public types ─────────────────────────────────────────────────────────────
72
73/// Header-level information extracted from an MXF file.
74///
75/// Populated by parsing the Header Partition Pack KLV triplet only —
76/// no header metadata sets are parsed.
77#[derive(Debug, Clone)]
78pub struct MxfHeaderInfo {
79    /// MXF format version (major, minor) from the partition pack.
80    pub version: (u16, u16),
81    /// Operational Pattern UL as a `urn:smpte:ul:` string.
82    ///
83    /// Common values: `OP1a` = `urn:smpte:ul:060e2b34.04010102.0d010201.01010900`
84    pub operational_pattern: String,
85    /// Essence Container ULs from the partition pack's EssenceContainers batch.
86    pub essence_containers: Vec<String>,
87    /// Descriptor extracted from header metadata (currently always `None`).
88    pub descriptor: Option<MxfDescriptor>,
89}
90
91/// Essence descriptor information from MXF header metadata.
92///
93/// Populated only if header metadata parsing is implemented. Currently always
94/// `None` — CPL EssenceDescriptors are the source of truth.
95#[derive(Debug, Clone)]
96pub enum MxfDescriptor {
97    Video(MxfVideoDescriptor),
98    Audio(MxfAudioDescriptor),
99    TimedText(MxfTimedTextDescriptor),
100}
101
102/// Video essence descriptor from MXF header metadata.
103#[derive(Debug, Clone)]
104pub struct MxfVideoDescriptor {
105    pub stored_width: u32,
106    pub stored_height: u32,
107    pub sample_rate: SampleRate,
108    /// Raw PictureCompression UL string — pass to `VideoCodec::from_ul`.
109    pub picture_compression_ul: Option<String>,
110    /// Raw ColorPrimaries UL string — pass to `ColorPrimaries::from_ul`.
111    pub color_primaries_ul: Option<String>,
112    /// Raw TransferCharacteristic UL string — pass to `TransferCharacteristic::from_ul`.
113    pub transfer_characteristic_ul: Option<String>,
114}
115
116/// Audio essence descriptor from MXF header metadata.
117#[derive(Debug, Clone)]
118pub struct MxfAudioDescriptor {
119    pub sample_rate: SampleRate,
120    pub channel_count: u32,
121    pub quantization_bits: u32,
122}
123
124/// Timed text (subtitle/caption) descriptor from MXF header metadata.
125#[derive(Debug, Clone)]
126pub struct MxfTimedTextDescriptor {
127    pub namespace_uri: Option<String>,
128}
129
130// ─── Parser ───────────────────────────────────────────────────────────────────
131
132/// Parse header-level information from an MXF file on disk.
133pub fn parse_mxf_header_info(path: &Path) -> Result<MxfHeaderInfo> {
134    let file = std::fs::File::open(path)?;
135    let mut reader = std::io::BufReader::new(file);
136    parse_mxf_header_info_from_reader(&mut reader)
137}
138
139/// Parse header-level information from an MXF byte stream.
140///
141/// Reads only the Header Partition Pack KLV triplet. Does not seek.
142pub fn parse_mxf_header_info_from_reader<R: Read>(reader: &mut R) -> Result<MxfHeaderInfo> {
143    // ── Step 1: Read KLV key (16 bytes) ──────────────────────────────────────
144    let mut key = [0u8; 16];
145    reader.read_exact(&mut key).map_err(|e| {
146        if e.kind() == std::io::ErrorKind::UnexpectedEof {
147            MxfParseError::NotMxf
148        } else {
149            MxfParseError::Io(e)
150        }
151    })?;
152
153    // Verify it is an MXF Header Partition Pack key.
154    // SMPTE ST 377-1 §7.1 — all partition pack keys share the same 12-byte prefix:
155    // 06 0E 2B 34 02 05 01 01 0D 01 02 01
156    // Byte 12 = 01 (header), 02 (body), 03 (footer)
157    // We only accept header partition packs.
158    const MXF_PP_PREFIX: [u8; 12] = [
159        0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01,
160    ];
161    if key[..12] != MXF_PP_PREFIX || key[12] != 0x01 {
162        return Err(MxfParseError::NotMxf);
163    }
164
165    // ── Step 2: BER-decode the length ─────────────────────────────────────────
166    let length = read_ber_length(reader, 16)?;
167
168    // Minimum valid partition pack body is 88 bytes (0 essence containers).
169    const MIN_PP_BODY: u64 = 88;
170    if length < MIN_PP_BODY {
171        return Err(MxfParseError::PartitionPackTooShort {
172            got: length as usize,
173            need: MIN_PP_BODY as usize,
174        });
175    }
176
177    // ── Step 3: Read partition pack body ─────────────────────────────────────
178    // Cap at 4 KiB to avoid absurd allocations on corrupt input. Real IMF
179    // header partition packs are well under 1 KiB, so lengths above the cap
180    // are a signal of a malformed file rather than a legitimate edge case.
181    const MAX_PP_BODY: u64 = 4096;
182    if length > MAX_PP_BODY {
183        return Err(MxfParseError::PartitionPackTooLarge {
184            got: length as usize,
185            cap: MAX_PP_BODY as usize,
186        });
187    }
188    let body_len = length as usize;
189    let mut body = vec![0u8; body_len];
190    reader.read_exact(&mut body)?;
191
192    // ── Step 4: Parse the fixed fields ───────────────────────────────────────
193    // SMPTE ST 377-1:2011, Table 13 — Partition Pack value layout (all big-endian)
194    // Offset  0  MajorVersion       UInt16
195    // Offset  2  MinorVersion       UInt16
196    // Offset  4  KAGSize            UInt32
197    // Offset  8  ThisPartition      UInt64
198    // Offset 16  PreviousPartition  UInt64
199    // Offset 24  FooterPartition    UInt64
200    // Offset 32  HeaderByteCount    UInt64
201    // Offset 40  IndexByteCount     UInt64
202    // Offset 48  IndexSID           UInt32
203    // Offset 52  BodyOffset         UInt64
204    // Offset 60  BodySID            UInt32
205    // Offset 64  OperationalPattern UL[16]
206    // Offset 80  EssenceContainers  batch(count:u32, size:u32, UL[16]...)
207
208    let major_version = u16::from_be_bytes([body[0], body[1]]);
209    let minor_version = u16::from_be_bytes([body[2], body[3]]);
210
211    // OperationalPattern is at offset 64 in the partition pack value.
212    let operational_pattern = format_ul(&body[64..80]);
213
214    // ── Step 5: Parse EssenceContainers batch at offset 80 ───────────────────
215    let mut essence_containers = Vec::new();
216    if body.len() >= 88 {
217        // Batch header: 4-byte count + 4-byte element size
218        let count = u32::from_be_bytes([body[80], body[81], body[82], body[83]]) as usize;
219        let elem_size = u32::from_be_bytes([body[84], body[85], body[86], body[87]]) as usize;
220
221        if elem_size == 16 {
222            let mut offset = 88;
223            for _ in 0..count {
224                if offset + 16 <= body.len() {
225                    essence_containers.push(format_ul(&body[offset..offset + 16]));
226                    offset += 16;
227                } else {
228                    break;
229                }
230            }
231        }
232    }
233
234    Ok(MxfHeaderInfo {
235        version: (major_version, minor_version),
236        operational_pattern,
237        essence_containers,
238        descriptor: None,
239    })
240}
241
242// ─── Helpers ─────────────────────────────────────────────────────────────────
243
244/// Read a BER-encoded length from `reader`.
245/// `key_offset` is used for error messages (byte offset of the key start).
246fn read_ber_length<R: Read>(reader: &mut R, key_offset: u64) -> Result<u64> {
247    let mut first = [0u8; 1];
248    reader.read_exact(&mut first)?;
249    let first = first[0];
250
251    if first < 0x80 {
252        return Ok(first as u64);
253    }
254
255    if first == 0x80 {
256        return Err(MxfParseError::KlvError {
257            offset: key_offset + 16,
258            message: "Indefinite BER length not supported in partition packs".to_string(),
259        });
260    }
261
262    let num_bytes = (first & 0x7F) as usize;
263    if num_bytes > 8 {
264        return Err(MxfParseError::KlvError {
265            offset: key_offset + 16,
266            message: format!("BER length too wide: {num_bytes} bytes"),
267        });
268    }
269
270    let mut buf = [0u8; 8];
271    reader.read_exact(&mut buf[8 - num_bytes..])?;
272    Ok(u64::from_be_bytes(buf))
273}
274
275/// Format 16 raw UL bytes as `urn:smpte:ul:xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx`.
276fn format_ul(bytes: &[u8]) -> String {
277    if bytes.len() < 16 {
278        return format!("(invalid-ul:{}-bytes)", bytes.len());
279    }
280    format!(
281        "urn:smpte:ul:{:02x}{:02x}{:02x}{:02x}.{:02x}{:02x}{:02x}{:02x}.\
282         {:02x}{:02x}{:02x}{:02x}.{:02x}{:02x}{:02x}{:02x}",
283        bytes[0],
284        bytes[1],
285        bytes[2],
286        bytes[3],
287        bytes[4],
288        bytes[5],
289        bytes[6],
290        bytes[7],
291        bytes[8],
292        bytes[9],
293        bytes[10],
294        bytes[11],
295        bytes[12],
296        bytes[13],
297        bytes[14],
298        bytes[15],
299    )
300}
301
302// ─── Tests ────────────────────────────────────────────────────────────────────
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307    use std::io::Cursor;
308
309    /// Helper: build a minimal valid MXF header partition pack byte stream.
310    /// Key (16) + BER length (1) + partition pack body (88).
311    fn make_minimal_mxf_stream(op_ul: [u8; 16]) -> Vec<u8> {
312        let mut stream = Vec::new();
313
314        // Key: Header Partition Pack (Closed and Complete = 01 02 04 00)
315        stream.extend_from_slice(&[
316            0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02,
317            0x04, 0x00,
318        ]);
319        // BER length = 88 (fits in 1 byte)
320        stream.push(88);
321
322        // Partition pack body (88 bytes):
323        // MajorVersion = 1
324        stream.extend_from_slice(&[0x00, 0x01]);
325        // MinorVersion = 3
326        stream.extend_from_slice(&[0x00, 0x03]);
327        // KAGSize = 512
328        stream.extend_from_slice(&[0x00, 0x00, 0x02, 0x00]);
329        // ThisPartition = 0
330        stream.extend_from_slice(&[0u8; 8]);
331        // PreviousPartition = 0
332        stream.extend_from_slice(&[0u8; 8]);
333        // FooterPartition = 0
334        stream.extend_from_slice(&[0u8; 8]);
335        // HeaderByteCount = 0
336        stream.extend_from_slice(&[0u8; 8]);
337        // IndexByteCount = 0
338        stream.extend_from_slice(&[0u8; 8]);
339        // IndexSID = 0
340        stream.extend_from_slice(&[0u8; 4]);
341        // BodyOffset = 0
342        stream.extend_from_slice(&[0u8; 8]);
343        // BodySID = 0
344        stream.extend_from_slice(&[0u8; 4]);
345        // OperationalPattern UL
346        stream.extend_from_slice(&op_ul);
347        // EssenceContainers batch: count=0, element_size=16
348        stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]); // count
349        stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x10]); // element_size
350
351        assert_eq!(stream.len(), 16 + 1 + 88);
352        stream
353    }
354
355    /// SMPTE ST 377-1 §7.1: a valid MXF file starts with a Header Partition Pack key.
356    #[test]
357    fn valid_header_partition_pack_parsed() {
358        // OP1a UL: 060E2B34.04010102.0D010201.01010900
359        let op1a: [u8; 16] = [
360            0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x02, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
361            0x09, 0x00,
362        ];
363        let stream = make_minimal_mxf_stream(op1a);
364        let mut cursor = Cursor::new(stream);
365        let info = parse_mxf_header_info_from_reader(&mut cursor).unwrap();
366
367        assert_eq!(info.version, (1, 3));
368        assert_eq!(
369            info.operational_pattern,
370            "urn:smpte:ul:060e2b34.04010102.0d010201.01010900"
371        );
372        assert!(info.essence_containers.is_empty());
373        assert!(info.descriptor.is_none());
374    }
375
376    /// SMPTE ST 377-1 §7.1: non-MXF files must be rejected.
377    #[test]
378    fn non_mxf_data_rejected() {
379        let data = vec![0u8; 105];
380        let mut cursor = Cursor::new(data);
381        assert!(matches!(
382            parse_mxf_header_info_from_reader(&mut cursor),
383            Err(MxfParseError::NotMxf)
384        ));
385    }
386
387    /// Body-type partition pack key (key[12] = 0x02) must be rejected — we
388    /// only accept header partition packs.
389    #[test]
390    fn body_partition_pack_rejected() {
391        let mut key = vec![
392            0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x02, 0x02,
393            0x04, 0x00, // key[12] = 0x02 = body
394        ];
395        key.extend_from_slice(&[0u8; 89]);
396        let mut cursor = Cursor::new(key);
397        assert!(matches!(
398            parse_mxf_header_info_from_reader(&mut cursor),
399            Err(MxfParseError::NotMxf)
400        ));
401    }
402
403    /// FIX-4 regression: an oversized partition pack returns
404    /// `PartitionPackTooLarge` rather than silently truncating to 4096 bytes.
405    /// Pre-fix behaviour was a silent `min(4096)` clamp that could swallow
406    /// essence-container data.
407    #[test]
408    fn oversized_partition_pack_returns_too_large() {
409        let mut bytes = Vec::new();
410        // Valid header partition pack key.
411        bytes.extend_from_slice(&[
412            0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
413            0x09, 0x00,
414        ]);
415        // BER long-form length = 5000 (above the 4096 cap).
416        // 4-byte BER encoding: 0x84 followed by 0x00001388 (5000).
417        bytes.extend_from_slice(&[0x84, 0x00, 0x00, 0x13, 0x88]);
418        // Body padding so read_exact has bytes to consume if the cap check
419        // didn't trip — we only ever need to hit the length check, so the
420        // body content doesn't matter.
421        bytes.extend(std::iter::repeat_n(0u8, 5000));
422
423        let mut cursor = Cursor::new(bytes);
424        assert!(
425            matches!(
426                parse_mxf_header_info_from_reader(&mut cursor),
427                Err(MxfParseError::PartitionPackTooLarge {
428                    got: 5000,
429                    cap: 4096
430                })
431            ),
432            "expected PartitionPackTooLarge {{ got: 5000, cap: 4096 }}"
433        );
434    }
435
436    /// SMPTE ST 377-1 §7.1: EssenceContainers batch is correctly parsed.
437    #[test]
438    fn essence_containers_parsed() {
439        let op: [u8; 16] = [
440            0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x02, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
441            0x09, 0x00,
442        ];
443        // JPEG 2000 Frame-wrapped container UL
444        let ec: [u8; 16] = [
445            0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x0D, 0x0D, 0x01, 0x03, 0x01, 0x02, 0x0C,
446            0x01, 0x00,
447        ];
448
449        let mut stream = Vec::new();
450        // Key: Header Partition Pack (Closed and Complete)
451        stream.extend_from_slice(&[
452            0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02,
453            0x04, 0x00,
454        ]);
455        // BER length = 88 + 16 = 104 (one essence container)
456        stream.push(104);
457
458        // Fixed fields (80 bytes): versions + padding to OP
459        stream.extend_from_slice(&[0x00, 0x01]); // MajorVersion = 1
460        stream.extend_from_slice(&[0x00, 0x03]); // MinorVersion = 3
461        stream.extend_from_slice(&[0x00, 0x00, 0x02, 0x00]); // KAGSize
462        stream.extend_from_slice(&[0u8; 8 * 5 + 4 + 8 + 4]); // padding to OP offset
463        stream.extend_from_slice(&op); // OperationalPattern at offset 68
464                                       // EssenceContainers batch: count=1, element_size=16, then 1 UL
465        stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x01]); // count
466        stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x10]); // element_size
467        stream.extend_from_slice(&ec);
468
469        let mut cursor = Cursor::new(stream);
470        let info = parse_mxf_header_info_from_reader(&mut cursor).unwrap();
471
472        assert_eq!(info.essence_containers.len(), 1);
473        assert_eq!(
474            info.essence_containers[0],
475            "urn:smpte:ul:060e2b34.0401010d.0d010301.020c0100"
476        );
477    }
478
479    /// Real MXF files from the test corpus parse without error.
480    #[test]
481    #[ignore = "requires test-data MXF files (large)"]
482    fn real_meridian_mxf_parses() {
483        let path = std::path::Path::new(
484            "../../test-data/MERIDIAN_Netflix_Photon_161006/MERIDIAN_Netflix_Photon_161006_00.mxf",
485        );
486        if !path.exists() {
487            return; // skip if test data not present
488        }
489        let info = parse_mxf_header_info(path).unwrap();
490        assert!(!info.operational_pattern.is_empty());
491        println!("OP: {}", info.operational_pattern);
492        for ec in &info.essence_containers {
493            println!("EC: {ec}");
494        }
495    }
496}