imferno_core/mxf/mod.rs
1//! SMPTE ST 377-1: Material Exchange Format (MXF) header parser.
2//!
3//! Reads the header partition pack from an MXF file and extracts:
4//! - Operational Pattern UL (OP1a, OP1b, etc.)
5//! - Essence Container ULs (codec container labels)
6//!
7//! Scope: partition-pack level only. Full header metadata set parsing
8//! (Preface, MaterialPackage, essence descriptors) is out of scope for
9//! this phase — CPL EssenceDescriptors are the primary source of format info.
10
11/// ST 2067-2 §5.3 audio MCA rules applied against the RegXML output
12/// of `mxf::metadata`. WAVE PCM requirement, sample rate / quant-bits
13/// whitelist, channel-label count match, SoundfieldGroupLabel
14/// singleton. Native-only.
15#[cfg(not(target_arch = "wasm32"))]
16pub mod audio_mca;
17pub mod codes;
18/// MXF essence-header validation backed by `smpte-mxf`. Native-only —
19/// the wasm validator never sees MXF binaries (browser callers upload
20/// the XML side of an IMF package), so this module isn't compiled for
21/// `target_arch = "wasm32"`.
22#[cfg(not(target_arch = "wasm32"))]
23pub mod essence;
24/// MXF header-metadata extraction via `regxml` — converts the full
25/// Preface tree (MaterialPackage, descriptors, MCA sub-descriptors)
26/// to RegXML for typed essence-rule application. Native-only.
27#[cfg(not(target_arch = "wasm32"))]
28pub mod metadata;
29/// ST 2067-2 §5.4 timed-text essence rules applied against RegXML.
30/// UCSEncoding=UTF-8, NamespaceURI ∈ IMSC1, MIMEType whitelist.
31/// Native-only.
32#[cfg(not(target_arch = "wasm32"))]
33pub mod timed_text;
34
35use std::io::Read;
36use std::path::Path;
37use thiserror::Error;
38
39/// A rational number representing a sample rate (numerator/denominator).
40///
41/// Used for `SampleRate` fields in MXF essence descriptors (ST 377-1).
42/// Distinct from `st2067_3::EditRate` — same representation, different domain.
43#[derive(Debug, Clone, PartialEq)]
44pub struct SampleRate {
45 pub numerator: i64,
46 pub denominator: i64,
47}
48
49// ─── Error ────────────────────────────────────────────────────────────────────
50
51#[derive(Debug, Error)]
52pub enum MxfParseError {
53 #[error("IO error: {0}")]
54 Io(#[from] std::io::Error),
55 #[error("Not a valid MXF file: invalid header partition pack key")]
56 NotMxf,
57 #[error("KLV parse error at byte offset {offset}: {message}")]
58 KlvError { offset: u64, message: String },
59 #[error("Header partition pack missing or too short (got {got} bytes, need ≥ {need})")]
60 PartitionPackTooShort { got: usize, need: usize },
61 /// The partition pack declares more bytes than the parser will read
62 /// (`MAX_PP_BODY = 4096`). Real-world IMF header partition packs are
63 /// well under 1 KiB; lengths above the cap suggest a corrupted file or
64 /// an unexpected MXF dialect — we error rather than silently truncate.
65 #[error("Header partition pack body too large (got {got} bytes, parser cap is {cap})")]
66 PartitionPackTooLarge { got: usize, cap: usize },
67}
68
69type Result<T> = std::result::Result<T, MxfParseError>;
70
71// ─── Public types ─────────────────────────────────────────────────────────────
72
73/// Header-level information extracted from an MXF file.
74///
75/// Populated by parsing the Header Partition Pack KLV triplet only —
76/// no header metadata sets are parsed.
77#[derive(Debug, Clone)]
78pub struct MxfHeaderInfo {
79 /// MXF format version (major, minor) from the partition pack.
80 pub version: (u16, u16),
81 /// Operational Pattern UL as a `urn:smpte:ul:` string.
82 ///
83 /// Common values: `OP1a` = `urn:smpte:ul:060e2b34.04010102.0d010201.01010900`
84 pub operational_pattern: String,
85 /// Essence Container ULs from the partition pack's EssenceContainers batch.
86 pub essence_containers: Vec<String>,
87 /// Descriptor extracted from header metadata (currently always `None`).
88 pub descriptor: Option<MxfDescriptor>,
89}
90
91/// Essence descriptor information from MXF header metadata.
92///
93/// Populated only if header metadata parsing is implemented. Currently always
94/// `None` — CPL EssenceDescriptors are the source of truth.
95#[derive(Debug, Clone)]
96pub enum MxfDescriptor {
97 Video(MxfVideoDescriptor),
98 Audio(MxfAudioDescriptor),
99 TimedText(MxfTimedTextDescriptor),
100}
101
102/// Video essence descriptor from MXF header metadata.
103#[derive(Debug, Clone)]
104pub struct MxfVideoDescriptor {
105 pub stored_width: u32,
106 pub stored_height: u32,
107 pub sample_rate: SampleRate,
108 /// Raw PictureCompression UL string — pass to `VideoCodec::from_ul`.
109 pub picture_compression_ul: Option<String>,
110 /// Raw ColorPrimaries UL string — pass to `ColorPrimaries::from_ul`.
111 pub color_primaries_ul: Option<String>,
112 /// Raw TransferCharacteristic UL string — pass to `TransferCharacteristic::from_ul`.
113 pub transfer_characteristic_ul: Option<String>,
114}
115
116/// Audio essence descriptor from MXF header metadata.
117#[derive(Debug, Clone)]
118pub struct MxfAudioDescriptor {
119 pub sample_rate: SampleRate,
120 pub channel_count: u32,
121 pub quantization_bits: u32,
122}
123
124/// Timed text (subtitle/caption) descriptor from MXF header metadata.
125#[derive(Debug, Clone)]
126pub struct MxfTimedTextDescriptor {
127 pub namespace_uri: Option<String>,
128}
129
130// ─── Parser ───────────────────────────────────────────────────────────────────
131
132/// Parse header-level information from an MXF file on disk.
133pub fn parse_mxf_header_info(path: &Path) -> Result<MxfHeaderInfo> {
134 let file = std::fs::File::open(path)?;
135 let mut reader = std::io::BufReader::new(file);
136 parse_mxf_header_info_from_reader(&mut reader)
137}
138
139/// Parse header-level information from an MXF byte stream.
140///
141/// Reads only the Header Partition Pack KLV triplet. Does not seek.
142pub fn parse_mxf_header_info_from_reader<R: Read>(reader: &mut R) -> Result<MxfHeaderInfo> {
143 // ── Step 1: Read KLV key (16 bytes) ──────────────────────────────────────
144 let mut key = [0u8; 16];
145 reader.read_exact(&mut key).map_err(|e| {
146 if e.kind() == std::io::ErrorKind::UnexpectedEof {
147 MxfParseError::NotMxf
148 } else {
149 MxfParseError::Io(e)
150 }
151 })?;
152
153 // Verify it is an MXF Header Partition Pack key.
154 // SMPTE ST 377-1 §7.1 — all partition pack keys share the same 12-byte prefix:
155 // 06 0E 2B 34 02 05 01 01 0D 01 02 01
156 // Byte 12 = 01 (header), 02 (body), 03 (footer)
157 // We only accept header partition packs.
158 const MXF_PP_PREFIX: [u8; 12] = [
159 0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01,
160 ];
161 if key[..12] != MXF_PP_PREFIX || key[12] != 0x01 {
162 return Err(MxfParseError::NotMxf);
163 }
164
165 // ── Step 2: BER-decode the length ─────────────────────────────────────────
166 let length = read_ber_length(reader, 16)?;
167
168 // Minimum valid partition pack body is 88 bytes (0 essence containers).
169 const MIN_PP_BODY: u64 = 88;
170 if length < MIN_PP_BODY {
171 return Err(MxfParseError::PartitionPackTooShort {
172 got: length as usize,
173 need: MIN_PP_BODY as usize,
174 });
175 }
176
177 // ── Step 3: Read partition pack body ─────────────────────────────────────
178 // Cap at 4 KiB to avoid absurd allocations on corrupt input. Real IMF
179 // header partition packs are well under 1 KiB, so lengths above the cap
180 // are a signal of a malformed file rather than a legitimate edge case.
181 const MAX_PP_BODY: u64 = 4096;
182 if length > MAX_PP_BODY {
183 return Err(MxfParseError::PartitionPackTooLarge {
184 got: length as usize,
185 cap: MAX_PP_BODY as usize,
186 });
187 }
188 let body_len = length as usize;
189 let mut body = vec![0u8; body_len];
190 reader.read_exact(&mut body)?;
191
192 // ── Step 4: Parse the fixed fields ───────────────────────────────────────
193 // SMPTE ST 377-1:2011, Table 13 — Partition Pack value layout (all big-endian)
194 // Offset 0 MajorVersion UInt16
195 // Offset 2 MinorVersion UInt16
196 // Offset 4 KAGSize UInt32
197 // Offset 8 ThisPartition UInt64
198 // Offset 16 PreviousPartition UInt64
199 // Offset 24 FooterPartition UInt64
200 // Offset 32 HeaderByteCount UInt64
201 // Offset 40 IndexByteCount UInt64
202 // Offset 48 IndexSID UInt32
203 // Offset 52 BodyOffset UInt64
204 // Offset 60 BodySID UInt32
205 // Offset 64 OperationalPattern UL[16]
206 // Offset 80 EssenceContainers batch(count:u32, size:u32, UL[16]...)
207
208 let major_version = u16::from_be_bytes([body[0], body[1]]);
209 let minor_version = u16::from_be_bytes([body[2], body[3]]);
210
211 // OperationalPattern is at offset 64 in the partition pack value.
212 let operational_pattern = format_ul(&body[64..80]);
213
214 // ── Step 5: Parse EssenceContainers batch at offset 80 ───────────────────
215 let mut essence_containers = Vec::new();
216 if body.len() >= 88 {
217 // Batch header: 4-byte count + 4-byte element size
218 let count = u32::from_be_bytes([body[80], body[81], body[82], body[83]]) as usize;
219 let elem_size = u32::from_be_bytes([body[84], body[85], body[86], body[87]]) as usize;
220
221 if elem_size == 16 {
222 let mut offset = 88;
223 for _ in 0..count {
224 if offset + 16 <= body.len() {
225 essence_containers.push(format_ul(&body[offset..offset + 16]));
226 offset += 16;
227 } else {
228 break;
229 }
230 }
231 }
232 }
233
234 Ok(MxfHeaderInfo {
235 version: (major_version, minor_version),
236 operational_pattern,
237 essence_containers,
238 descriptor: None,
239 })
240}
241
242// ─── Helpers ─────────────────────────────────────────────────────────────────
243
244/// Read a BER-encoded length from `reader`.
245/// `key_offset` is used for error messages (byte offset of the key start).
246fn read_ber_length<R: Read>(reader: &mut R, key_offset: u64) -> Result<u64> {
247 let mut first = [0u8; 1];
248 reader.read_exact(&mut first)?;
249 let first = first[0];
250
251 if first < 0x80 {
252 return Ok(first as u64);
253 }
254
255 if first == 0x80 {
256 return Err(MxfParseError::KlvError {
257 offset: key_offset + 16,
258 message: "Indefinite BER length not supported in partition packs".to_string(),
259 });
260 }
261
262 let num_bytes = (first & 0x7F) as usize;
263 if num_bytes > 8 {
264 return Err(MxfParseError::KlvError {
265 offset: key_offset + 16,
266 message: format!("BER length too wide: {num_bytes} bytes"),
267 });
268 }
269
270 let mut buf = [0u8; 8];
271 reader.read_exact(&mut buf[8 - num_bytes..])?;
272 Ok(u64::from_be_bytes(buf))
273}
274
275/// Format 16 raw UL bytes as `urn:smpte:ul:xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx`.
276fn format_ul(bytes: &[u8]) -> String {
277 if bytes.len() < 16 {
278 return format!("(invalid-ul:{}-bytes)", bytes.len());
279 }
280 format!(
281 "urn:smpte:ul:{:02x}{:02x}{:02x}{:02x}.{:02x}{:02x}{:02x}{:02x}.\
282 {:02x}{:02x}{:02x}{:02x}.{:02x}{:02x}{:02x}{:02x}",
283 bytes[0],
284 bytes[1],
285 bytes[2],
286 bytes[3],
287 bytes[4],
288 bytes[5],
289 bytes[6],
290 bytes[7],
291 bytes[8],
292 bytes[9],
293 bytes[10],
294 bytes[11],
295 bytes[12],
296 bytes[13],
297 bytes[14],
298 bytes[15],
299 )
300}
301
302// ─── Tests ────────────────────────────────────────────────────────────────────
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307 use std::io::Cursor;
308
309 /// Helper: build a minimal valid MXF header partition pack byte stream.
310 /// Key (16) + BER length (1) + partition pack body (88).
311 fn make_minimal_mxf_stream(op_ul: [u8; 16]) -> Vec<u8> {
312 let mut stream = Vec::new();
313
314 // Key: Header Partition Pack (Closed and Complete = 01 02 04 00)
315 stream.extend_from_slice(&[
316 0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02,
317 0x04, 0x00,
318 ]);
319 // BER length = 88 (fits in 1 byte)
320 stream.push(88);
321
322 // Partition pack body (88 bytes):
323 // MajorVersion = 1
324 stream.extend_from_slice(&[0x00, 0x01]);
325 // MinorVersion = 3
326 stream.extend_from_slice(&[0x00, 0x03]);
327 // KAGSize = 512
328 stream.extend_from_slice(&[0x00, 0x00, 0x02, 0x00]);
329 // ThisPartition = 0
330 stream.extend_from_slice(&[0u8; 8]);
331 // PreviousPartition = 0
332 stream.extend_from_slice(&[0u8; 8]);
333 // FooterPartition = 0
334 stream.extend_from_slice(&[0u8; 8]);
335 // HeaderByteCount = 0
336 stream.extend_from_slice(&[0u8; 8]);
337 // IndexByteCount = 0
338 stream.extend_from_slice(&[0u8; 8]);
339 // IndexSID = 0
340 stream.extend_from_slice(&[0u8; 4]);
341 // BodyOffset = 0
342 stream.extend_from_slice(&[0u8; 8]);
343 // BodySID = 0
344 stream.extend_from_slice(&[0u8; 4]);
345 // OperationalPattern UL
346 stream.extend_from_slice(&op_ul);
347 // EssenceContainers batch: count=0, element_size=16
348 stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]); // count
349 stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x10]); // element_size
350
351 assert_eq!(stream.len(), 16 + 1 + 88);
352 stream
353 }
354
355 /// SMPTE ST 377-1 §7.1: a valid MXF file starts with a Header Partition Pack key.
356 #[test]
357 fn valid_header_partition_pack_parsed() {
358 // OP1a UL: 060E2B34.04010102.0D010201.01010900
359 let op1a: [u8; 16] = [
360 0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x02, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
361 0x09, 0x00,
362 ];
363 let stream = make_minimal_mxf_stream(op1a);
364 let mut cursor = Cursor::new(stream);
365 let info = parse_mxf_header_info_from_reader(&mut cursor).unwrap();
366
367 assert_eq!(info.version, (1, 3));
368 assert_eq!(
369 info.operational_pattern,
370 "urn:smpte:ul:060e2b34.04010102.0d010201.01010900"
371 );
372 assert!(info.essence_containers.is_empty());
373 assert!(info.descriptor.is_none());
374 }
375
376 /// SMPTE ST 377-1 §7.1: non-MXF files must be rejected.
377 #[test]
378 fn non_mxf_data_rejected() {
379 let data = vec![0u8; 105];
380 let mut cursor = Cursor::new(data);
381 assert!(matches!(
382 parse_mxf_header_info_from_reader(&mut cursor),
383 Err(MxfParseError::NotMxf)
384 ));
385 }
386
387 /// Body-type partition pack key (key[12] = 0x02) must be rejected — we
388 /// only accept header partition packs.
389 #[test]
390 fn body_partition_pack_rejected() {
391 let mut key = vec![
392 0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x02, 0x02,
393 0x04, 0x00, // key[12] = 0x02 = body
394 ];
395 key.extend_from_slice(&[0u8; 89]);
396 let mut cursor = Cursor::new(key);
397 assert!(matches!(
398 parse_mxf_header_info_from_reader(&mut cursor),
399 Err(MxfParseError::NotMxf)
400 ));
401 }
402
403 /// FIX-4 regression: an oversized partition pack returns
404 /// `PartitionPackTooLarge` rather than silently truncating to 4096 bytes.
405 /// Pre-fix behaviour was a silent `min(4096)` clamp that could swallow
406 /// essence-container data.
407 #[test]
408 fn oversized_partition_pack_returns_too_large() {
409 let mut bytes = Vec::new();
410 // Valid header partition pack key.
411 bytes.extend_from_slice(&[
412 0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
413 0x09, 0x00,
414 ]);
415 // BER long-form length = 5000 (above the 4096 cap).
416 // 4-byte BER encoding: 0x84 followed by 0x00001388 (5000).
417 bytes.extend_from_slice(&[0x84, 0x00, 0x00, 0x13, 0x88]);
418 // Body padding so read_exact has bytes to consume if the cap check
419 // didn't trip — we only ever need to hit the length check, so the
420 // body content doesn't matter.
421 bytes.extend(std::iter::repeat_n(0u8, 5000));
422
423 let mut cursor = Cursor::new(bytes);
424 assert!(
425 matches!(
426 parse_mxf_header_info_from_reader(&mut cursor),
427 Err(MxfParseError::PartitionPackTooLarge {
428 got: 5000,
429 cap: 4096
430 })
431 ),
432 "expected PartitionPackTooLarge {{ got: 5000, cap: 4096 }}"
433 );
434 }
435
436 /// SMPTE ST 377-1 §7.1: EssenceContainers batch is correctly parsed.
437 #[test]
438 fn essence_containers_parsed() {
439 let op: [u8; 16] = [
440 0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x02, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
441 0x09, 0x00,
442 ];
443 // JPEG 2000 Frame-wrapped container UL
444 let ec: [u8; 16] = [
445 0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x0D, 0x0D, 0x01, 0x03, 0x01, 0x02, 0x0C,
446 0x01, 0x00,
447 ];
448
449 let mut stream = Vec::new();
450 // Key: Header Partition Pack (Closed and Complete)
451 stream.extend_from_slice(&[
452 0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02,
453 0x04, 0x00,
454 ]);
455 // BER length = 88 + 16 = 104 (one essence container)
456 stream.push(104);
457
458 // Fixed fields (80 bytes): versions + padding to OP
459 stream.extend_from_slice(&[0x00, 0x01]); // MajorVersion = 1
460 stream.extend_from_slice(&[0x00, 0x03]); // MinorVersion = 3
461 stream.extend_from_slice(&[0x00, 0x00, 0x02, 0x00]); // KAGSize
462 stream.extend_from_slice(&[0u8; 8 * 5 + 4 + 8 + 4]); // padding to OP offset
463 stream.extend_from_slice(&op); // OperationalPattern at offset 68
464 // EssenceContainers batch: count=1, element_size=16, then 1 UL
465 stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x01]); // count
466 stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x10]); // element_size
467 stream.extend_from_slice(&ec);
468
469 let mut cursor = Cursor::new(stream);
470 let info = parse_mxf_header_info_from_reader(&mut cursor).unwrap();
471
472 assert_eq!(info.essence_containers.len(), 1);
473 assert_eq!(
474 info.essence_containers[0],
475 "urn:smpte:ul:060e2b34.0401010d.0d010301.020c0100"
476 );
477 }
478
479 /// Real MXF files from the test corpus parse without error.
480 #[test]
481 #[ignore = "requires test-data MXF files (large)"]
482 fn real_meridian_mxf_parses() {
483 let path = std::path::Path::new(
484 "../../test-data/MERIDIAN_Netflix_Photon_161006/MERIDIAN_Netflix_Photon_161006_00.mxf",
485 );
486 if !path.exists() {
487 return; // skip if test data not present
488 }
489 let info = parse_mxf_header_info(path).unwrap();
490 assert!(!info.operational_pattern.is_empty());
491 println!("OP: {}", info.operational_pattern);
492 for ec in &info.essence_containers {
493 println!("EC: {ec}");
494 }
495 }
496}