Skip to main content

imferno_core/package/
mod.rs

1//! IMF Core — Integrated IMF Package Parser
2//!
3//! This module provides a high-level interface for parsing complete IMF packages
4//! by coordinating the individual SMPTE standard parsers.
5//!
6//! ## Key entry points
7//!
8//! - [`build_report`] — parse and validate an IMF package, returning an [`ImfReport`].
9//! - [`format_report`] — render an [`ImfReport`] as a human-readable string.
10
11use crate::assetmap::ImfUuid;
12use crate::cpl::EditRate;
13use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use thiserror::Error;
16
17pub mod codes;
18pub mod report;
19
20#[allow(deprecated)]
21pub use self::report::{
22    build_report, format_report, format_validation_result, FormatOptions, ImfReport, ReportFormat,
23};
24pub use crate::assetmap::{Asset, AssetMap, PackingList, PklAsset, VolumeIndex};
25pub use crate::cpl::{CompositionPlaylist, Resource as CplResource};
26pub use crate::diagnostics::{
27    Category, Location, Severity, ValidationIssue, ValidationProfile, ValidationReport,
28};
29
30/// Result of parsing and validating an IMF package.
31///
32/// This is the primary return type — contains the full parsed package
33/// and all validation findings.
34#[derive(Debug, serde::Serialize)]
35pub struct ValidationResult {
36    /// The fully parsed IMF package.
37    pub package: Imferno,
38    /// Validation findings (spec violations, warnings, info).
39    pub validation: ValidationReport,
40}
41
42/// Parse and validate an IMF package in one call.
43///
44/// This is the recommended entry point. Returns the full parsed package
45/// plus all validation findings.
46///
47/// ```no_run
48/// use imferno_core::package::{validate, read_dir, ValidationOptions};
49///
50/// let files = read_dir("./my-imp").unwrap();
51/// let result = validate(files, &ValidationOptions::default());
52/// println!("Compliant: {}", result.validation.is_compliant);
53/// for cpl in result.package.composition_playlists.values() {
54///     println!("CPL: {}", cpl.content_title.text);
55/// }
56/// ```
57pub fn validate(
58    files: std::collections::HashMap<String, String>,
59    options: &ValidationOptions,
60) -> ValidationResult {
61    match Imferno::parse(files) {
62        Ok(package) => {
63            let validation = package.validate(options);
64            ValidationResult {
65                package,
66                validation,
67            }
68        }
69        Err(e) => {
70            let mut validation = ValidationReport::new(ValidationProfile::SMPTE);
71            validation.add(ValidationIssue::new(
72                Severity::Critical,
73                Category::Structure,
74                codes::ImfernoCode::ParseError,
75                format!("Failed to parse IMF package: {e}"),
76            ));
77            // Return a minimal Imferno with what we could parse
78            // For now, this is unreachable in practice since parse only fails
79            // on missing ASSETMAP — but we handle it gracefully.
80            let validation = validation.apply_rules(&options.rules);
81            // Re-parse won't work since files are consumed. Use parse_and_validate fallback.
82            ValidationResult {
83                package: Imferno::empty(),
84                validation,
85            }
86        }
87    }
88}
89
90#[derive(Error, Debug)]
91pub enum ImfError {
92    #[error("IO error: {0}")]
93    Io(#[from] std::io::Error),
94
95    #[error("AssetMap parse error: {0}")]
96    AssetMapParse(#[from] crate::assetmap::AssetMapParseError),
97
98    #[error("CPL parse error: {0}")]
99    CplParse(#[from] crate::cpl::CplParseError),
100
101    #[error("UUID error: {0}")]
102    Uuid(String),
103
104    #[error("Missing required file: {0}")]
105    MissingFile(String),
106
107    #[error("Invalid IMF package structure: {0}")]
108    InvalidStructure(String),
109}
110
111pub type Result<T> = std::result::Result<T, ImfError>;
112
113/// Errors found during PKL file manifest / hash / cross-reference validation.
114///
115/// Per SMPTE ST 2067-2 §7-9, the AssetMap, PKL, and CPL must maintain
116/// consistent cross-references. These errors describe structural violations.
117#[derive(Debug)]
118pub enum FileValidationError {
119    /// PKL lists an asset UUID that has no entry in the AssetMap (ST 2067-2 §7).
120    NotInAssetMap {
121        uuid: String,
122        original_file_name: Option<String>,
123    },
124    /// File expected on disk but not found.
125    Missing { uuid: String, path: PathBuf },
126    /// File exists but its byte size differs from the PKL declaration.
127    SizeMismatch {
128        uuid: String,
129        path: PathBuf,
130        expected: u64,
131        actual: u64,
132    },
133    /// Hash digest does not match PKL hash (SHA-1 or SHA-256).
134    HashMismatch {
135        uuid: String,
136        path: PathBuf,
137        expected: String,
138        actual: String,
139    },
140    /// I/O error while reading the file for hashing.
141    Io {
142        uuid: String,
143        path: PathBuf,
144        message: String,
145    },
146    /// Same asset UUID appears more than once in a single PKL (ST 2067-2 §9).
147    DuplicatePklAssetId { uuid: String, pkl_id: String },
148    /// PKL document carries a namespace URI we don't recognise. Per
149    /// ST 429-8 (the canonical PKL standard) and ST 2067-2:2016, the
150    /// only acceptable values are the published namespaces; an
151    /// unrecognised one breaks downstream tool interoperability.
152    UnknownPklNamespace { pkl_id: String, namespace: String },
153    /// The AssetMap has no asset carrying `<PackingList>true</PackingList>`,
154    /// so no PKL document is declared. ST 429-9 §6.3 requires the
155    /// AssetMap to identify which assets are PKLs.
156    AssetMapHasNoPackingList,
157    /// A PKL document was parsed but its Id does not appear as a
158    /// `PackingList`-flagged asset in the AssetMap. ST 429-9 §6.3
159    /// requires every PKL to be declared in the AssetMap.
160    PklIdNotInAssetMap { pkl_id: String },
161}
162
163impl FileValidationError {
164    pub fn uuid(&self) -> &str {
165        match self {
166            Self::NotInAssetMap { uuid, .. } => uuid,
167            Self::Missing { uuid, .. } => uuid,
168            Self::SizeMismatch { uuid, .. } => uuid,
169            Self::HashMismatch { uuid, .. } => uuid,
170            Self::Io { uuid, .. } => uuid,
171            Self::DuplicatePklAssetId { uuid, .. } => uuid,
172            // The variants below are package-scope, not asset-scope —
173            // the relevant identifier is the PKL document or "—" when
174            // no document is involved at all.
175            Self::UnknownPklNamespace { pkl_id, .. } => pkl_id,
176            Self::PklIdNotInAssetMap { pkl_id } => pkl_id,
177            Self::AssetMapHasNoPackingList => "—",
178        }
179    }
180}
181
182impl std::fmt::Display for FileValidationError {
183    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
184        match self {
185            Self::NotInAssetMap {
186                uuid,
187                original_file_name,
188            } => {
189                write!(
190                    f,
191                    "PKL asset {} ({}) not found in AssetMap",
192                    uuid,
193                    original_file_name.as_deref().unwrap_or("no filename")
194                )
195            }
196            Self::Missing { uuid, path } => {
197                write!(f, "Missing file for {}: {}", uuid, path.display())
198            }
199            Self::SizeMismatch {
200                uuid,
201                path,
202                expected,
203                actual,
204            } => {
205                write!(
206                    f,
207                    "Size mismatch for {} ({}): expected {} bytes, found {}",
208                    uuid,
209                    path.display(),
210                    expected,
211                    actual
212                )
213            }
214            Self::HashMismatch {
215                uuid,
216                path,
217                expected,
218                actual,
219            } => {
220                write!(
221                    f,
222                    "Hash mismatch for {} ({}): expected {}, got {}",
223                    uuid,
224                    path.display(),
225                    expected,
226                    actual
227                )
228            }
229            Self::Io {
230                uuid,
231                path,
232                message,
233            } => {
234                write!(
235                    f,
236                    "IO error reading {} ({}): {}",
237                    uuid,
238                    path.display(),
239                    message
240                )
241            }
242            Self::DuplicatePklAssetId { uuid, pkl_id } => {
243                write!(f, "Duplicate asset UUID {} in PKL {}", uuid, pkl_id)
244            }
245            Self::UnknownPklNamespace { pkl_id, namespace } => {
246                write!(
247                    f,
248                    "PKL {} carries an unrecognised namespace URI: {}",
249                    pkl_id, namespace
250                )
251            }
252            Self::AssetMapHasNoPackingList => {
253                write!(
254                    f,
255                    "AssetMap declares no PKL (no asset has <PackingList>true</PackingList>)"
256                )
257            }
258            Self::PklIdNotInAssetMap { pkl_id } => {
259                write!(
260                    f,
261                    "PKL document {} is not declared as a PackingList asset in the AssetMap",
262                    pkl_id
263                )
264            }
265        }
266    }
267}
268
269impl From<&FileValidationError> for ValidationIssue {
270    fn from(err: &FileValidationError) -> Self {
271        match err {
272            FileValidationError::NotInAssetMap {
273                uuid,
274                original_file_name,
275            } => ValidationIssue::new(
276                Severity::Error,
277                Category::Reference,
278                codes::St2067_2_2020::UnresolvedUuid,
279                format!(
280                    "PKL asset {} ({}) not found in AssetMap",
281                    uuid,
282                    original_file_name.as_deref().unwrap_or("no filename")
283                ),
284            )
285            .with_context("asset_uuid", uuid.clone()),
286            FileValidationError::Missing { uuid, path } => ValidationIssue::new(
287                Severity::Error,
288                Category::Asset,
289                codes::St2067_2_2020::FileNotFound,
290                format!("Missing file for asset {}: {}", uuid, path.display()),
291            )
292            .with_location(Location::new().with_file(path.clone()))
293            .with_context("asset_uuid", uuid.clone()),
294            FileValidationError::SizeMismatch {
295                uuid,
296                path,
297                expected,
298                actual,
299            } => ValidationIssue::new(
300                Severity::Error,
301                Category::Asset,
302                codes::St2067_2_2020::SizeMismatch,
303                format!(
304                    "Size mismatch for asset {} ({}): PKL declares {} bytes, file is {} bytes",
305                    uuid,
306                    path.display(),
307                    expected,
308                    actual
309                ),
310            )
311            .with_location(Location::new().with_file(path.clone()))
312            .with_context("asset_uuid", uuid.clone())
313            .with_context("expected_size", expected.to_string())
314            .with_context("actual_size", actual.to_string()),
315            FileValidationError::HashMismatch {
316                uuid,
317                path,
318                expected,
319                actual,
320            } => ValidationIssue::new(
321                Severity::Critical,
322                Category::Asset,
323                codes::St2067_2_2020::ChecksumMismatch,
324                format!(
325                    "Hash mismatch for asset {} ({}): expected {}, computed {}",
326                    uuid,
327                    path.display(),
328                    expected,
329                    actual
330                ),
331            )
332            .with_location(Location::new().with_file(path.clone()))
333            .with_context("asset_uuid", uuid.clone())
334            .with_suggestion("Re-deliver the asset or re-generate the PKL hash"),
335            FileValidationError::Io {
336                uuid,
337                path,
338                message,
339            } => ValidationIssue::new(
340                Severity::Error,
341                Category::Asset,
342                codes::St2067_2_2020::IoError,
343                format!(
344                    "IO error reading asset {} ({}): {}",
345                    uuid,
346                    path.display(),
347                    message
348                ),
349            )
350            .with_location(Location::new().with_file(path.clone()))
351            .with_context("asset_uuid", uuid.clone()),
352            FileValidationError::DuplicatePklAssetId { uuid, pkl_id } => ValidationIssue::new(
353                Severity::Error,
354                Category::Reference,
355                codes::St2067_2_2020::DuplicateUuid,
356                format!("Duplicate asset UUID {} in PKL {}", uuid, pkl_id),
357            )
358            .with_context("asset_uuid", uuid.clone())
359            .with_context("pkl_id", pkl_id.clone()),
360            FileValidationError::UnknownPklNamespace { pkl_id, namespace } => ValidationIssue::new(
361                Severity::Error,
362                Category::Structure,
363                codes::St2067_2_2020::PklUnknownNamespace,
364                format!(
365                    "PKL {} carries unrecognised namespace '{}' — not in the published \
366                     SMPTE PKL namespace set",
367                    pkl_id, namespace
368                ),
369            )
370            .with_context("pkl_id", pkl_id.clone())
371            .with_context("namespace", namespace.clone()),
372            FileValidationError::AssetMapHasNoPackingList => ValidationIssue::new(
373                Severity::Critical,
374                Category::Structure,
375                codes::St2067_2_2020::AssetMapHasNoPackingList,
376                "AssetMap declares no PKL (no asset has <PackingList>true</PackingList>)"
377                    .to_string(),
378            ),
379            FileValidationError::PklIdNotInAssetMap { pkl_id } => ValidationIssue::new(
380                Severity::Error,
381                Category::Reference,
382                codes::St2067_2_2020::PklIdNotInAssetMap,
383                format!(
384                    "PKL document {} is not declared as a PackingList asset in the AssetMap",
385                    pkl_id
386                ),
387            )
388            .with_context("pkl_id", pkl_id.clone()),
389        }
390    }
391}
392
393/// High-level IMF package representation.
394///
395/// This is the full parsed package — all CPLs, PKLs, AssetMap, SCMs, and
396/// cross-references. Serializable to JSON for WASM/NAPI consumers.
397#[derive(Debug, serde::Serialize)]
398#[serde(rename_all = "camelCase")]
399pub struct Imferno {
400    /// Package root directory
401    #[serde(serialize_with = "serialize_path")]
402    pub root_path: PathBuf,
403
404    /// Volume index (VOLINDEX.xml)
405    pub volume_index: VolumeIndex,
406
407    /// Load-time VOLINDEX diagnostics (ST 429-9), emitted before all other checks.
408    #[serde(skip)]
409    pub volindex_issues: Vec<ValidationIssue>,
410
411    /// Load-time parse diagnostics (PKL/CPL/OPL/SCM failures), emitted during validation.
412    #[serde(skip)]
413    pub(crate) parse_issues: Vec<ValidationIssue>,
414
415    /// Asset map (ASSETMAP.xml)
416    pub asset_map: AssetMap,
417
418    /// Parsed Packing Lists mapped by UUID
419    pub packing_lists: HashMap<ImfUuid, PackingList>,
420
421    /// Parsed CPL files mapped by UUID
422    pub composition_playlists: HashMap<ImfUuid, CompositionPlaylist>,
423
424    /// Raw CPL XML content mapped by UUID (retained for future signature verification).
425    #[serde(skip)]
426    #[allow(dead_code)]
427    pub(crate) cpl_xml_content: HashMap<ImfUuid, String>,
428
429    /// Parsed Output Profile Lists mapped by UUID
430    pub output_profile_lists: HashMap<ImfUuid, crate::assetmap::OutputProfileList>,
431
432    /// Parsed Sidecar Composition Maps mapped by UUID (ST 2067-9:2018)
433    pub sidecar_composition_maps: HashMap<ImfUuid, crate::scm::SidecarCompositionMap>,
434
435    /// Asset UUID to file path mapping
436    #[serde(serialize_with = "serialize_path_map")]
437    pub asset_paths: HashMap<ImfUuid, PathBuf>,
438}
439
440fn serialize_path<S: serde::Serializer>(path: &Path, s: S) -> std::result::Result<S::Ok, S::Error> {
441    s.serialize_str(&path.to_string_lossy())
442}
443
444fn serialize_path_map<S: serde::Serializer>(
445    map: &HashMap<ImfUuid, PathBuf>,
446    s: S,
447) -> std::result::Result<S::Ok, S::Error> {
448    use serde::ser::SerializeMap;
449    let mut m = s.serialize_map(Some(map.len()))?;
450    for (k, v) in map {
451        m.serialize_entry(k, &v.to_string_lossy().into_owned())?;
452    }
453    m.end()
454}
455
456/// Resolve an asset chunk path against the package root, rejecting path traversal.
457///
458/// Returns `None` if the path is absolute or contains `..` components that
459/// would escape the package root. This prevents a malicious AssetMap from
460/// causing file reads outside the intended directory.
461fn sanitize_asset_path(root: &Path, chunk_path: &str) -> Option<PathBuf> {
462    let rel = Path::new(chunk_path);
463    // Reject absolute paths outright
464    if rel.is_absolute() {
465        return None;
466    }
467    // Check lexical components for parent-dir traversal
468    for component in rel.components() {
469        if component == std::path::Component::ParentDir {
470            return None;
471        }
472    }
473    let joined = root.join(rel);
474    // If the file exists, verify the canonical path is still under root
475    if let Ok(canonical) = joined.canonicalize() {
476        if canonical.starts_with(root) {
477            return Some(canonical);
478        }
479        return None; // symlink escape
480    }
481    // File doesn't exist yet — lexical check above is sufficient
482    Some(joined)
483}
484
485/// Read all files from a directory into a `HashMap<String, String>`.
486///
487/// XML files are read as strings. Binary files (e.g. MXF) that fail UTF-8
488/// decoding are silently skipped.
489///
490/// Keys are the **absolute** file paths. `from_file_map` (called by `parse`)
491/// derives the package `root_path` from these keys so that file-manifest
492/// and MXF-header validation work correctly on native targets.
493pub fn read_dir(path: impl AsRef<Path>) -> Result<HashMap<String, String>> {
494    use crate::storage::{fs::FsStorage, StorageUri};
495
496    let path = path
497        .as_ref()
498        .canonicalize()
499        .unwrap_or_else(|_| path.as_ref().to_path_buf());
500    let uri = StorageUri::parse(&path.to_string_lossy())
501        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e.to_string()))?;
502    let storage = FsStorage::new();
503
504    read_xml_files(&uri, &storage).map_err(|e| std::io::Error::other(e.to_string()).into())
505}
506
507/// Read all `.xml` files at the given URI through the supplied storage backend.
508///
509/// Returns a map of fully-qualified URIs to file contents. Non-XML entries
510/// and files that fail UTF-8 decoding are skipped (with a warning to stderr
511/// for parity with the legacy `read_dir` behavior).
512///
513/// This is the recommended trait-based entry point. Re-exported as [`read`].
514///
515/// # Example — local filesystem
516///
517/// ```no_run
518/// use imferno_core::package::{read, Imferno};
519/// use imferno_core::storage::{fs::FsStorage, StorageUri};
520///
521/// let uri = StorageUri::parse("/path/to/imp").unwrap();
522/// let storage = FsStorage::new();
523/// let files = read(&uri, &storage).unwrap();
524/// let package = Imferno::parse(files).unwrap();
525/// ```
526///
527/// # Example — S3 (requires the `aws-s3` feature)
528///
529/// ```ignore
530/// use imferno_core::package::{read, Imferno};
531/// use imferno_core::storage::{s3::S3Storage, StorageUri};
532///
533/// let uri = StorageUri::parse("s3://my-bucket/imp/").unwrap();
534/// let storage = S3Storage::from_default().unwrap();
535/// let files = read(&uri, &storage).unwrap();
536/// ```
537pub fn read_xml_files(
538    uri: &crate::storage::StorageUri,
539    storage: &dyn crate::storage::Storage,
540) -> std::result::Result<HashMap<String, String>, crate::storage::StorageError> {
541    let mut files = HashMap::new();
542    for entry in storage.list(uri)? {
543        if !entry.is_file {
544            continue;
545        }
546        if !entry.uri.to_ascii_lowercase().ends_with(".xml") {
547            continue;
548        }
549        let entry_uri = crate::storage::StorageUri::parse(&entry.uri)?;
550        match storage.read_to_string(&entry_uri) {
551            Ok(content) => {
552                files.insert(entry.uri, content);
553            }
554            Err(e) => {
555                eprintln!("Warning: failed to read XML file {}: {}", entry.uri, e);
556            }
557        }
558    }
559    Ok(files)
560}
561
562/// Public alias: `package::read(uri, storage)` — same as [`read_xml_files`].
563pub use self::read_xml_files as read;
564
565/// Read all XML files from an S3 prefix into a filename→content map.
566///
567/// This mirrors [`read_dir`] but reads from an S3 bucket. Only `.xml` files
568/// are returned. Keys are `s3://{bucket}/{key}` URIs.
569///
570/// # Arguments
571/// * `client` — An `aws_sdk_s3::Client` (caller controls region, credentials, endpoint).
572/// * `bucket` — The S3 bucket name.
573/// * `prefix` — The key prefix (e.g. `"packages/my-imf-package/"`). Should end with `/`.
574#[cfg(feature = "aws-s3")]
575pub async fn read_s3(
576    client: &aws_sdk_s3::Client,
577    bucket: &str,
578    prefix: &str,
579) -> Result<HashMap<String, String>> {
580    use crate::storage::{s3::S3Storage, StorageUri};
581
582    let storage =
583        S3Storage::from_client(client.clone()).map_err(|e| std::io::Error::other(e.to_string()))?;
584    let uri_str = format!("s3://{bucket}/{prefix}");
585    let uri = StorageUri::parse(&uri_str)
586        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e.to_string()))?;
587
588    // The trait method is sync; run on a blocking task so we don't block the
589    // async caller's runtime.
590    tokio::task::spawn_blocking(move || {
591        crate::package::read_xml_files(&uri, &storage)
592            .map_err(|e| std::io::Error::other(e.to_string()))
593    })
594    .await
595    .map_err(|e| std::io::Error::other(format!("join error: {e}")))?
596    .map_err(Into::into)
597}
598
599impl Imferno {
600    /// Create an empty Imferno (used when parse fails but we still need a struct).
601    fn empty() -> Self {
602        Self {
603            root_path: PathBuf::new(),
604            volume_index: VolumeIndex { index: 1 },
605            volindex_issues: Vec::new(),
606            parse_issues: Vec::new(),
607            asset_map: crate::assetmap::AssetMap {
608                namespace: Default::default(),
609                id: ImfUuid::parse("urn:uuid:00000000-0000-0000-0000-000000000000")
610                    .expect("nil UUID is always valid"),
611                annotation_text: None,
612                creator: None,
613                volume_count: 1,
614                issue_date: "1970-01-01T00:00:00+00:00".into(),
615                issuer: None,
616                asset_list: crate::assetmap::AssetList { assets: Vec::new() },
617            },
618            packing_lists: HashMap::new(),
619            composition_playlists: HashMap::new(),
620            cpl_xml_content: HashMap::new(),
621            output_profile_lists: HashMap::new(),
622            sidecar_composition_maps: HashMap::new(),
623            asset_paths: HashMap::new(),
624        }
625    }
626
627    /// Parse an IMF package from an in-memory filename→XML string map (public API).
628    ///
629    /// This is the parse-only entry point. For parse + validate, use
630    /// [`validate()`] instead.
631    pub fn parse(files: HashMap<String, String>) -> Result<Self> {
632        Self::from_file_map(&files)
633    }
634
635    /// Parse + validate in one call. Returns a `ValidationReport`.
636    pub fn parse_and_validate(
637        files: HashMap<String, String>,
638        options: &ValidationOptions,
639    ) -> ValidationReport {
640        let package = match Self::parse(files) {
641            Ok(pkg) => pkg,
642            Err(e) => {
643                let mut report = ValidationReport::new(ValidationProfile::SMPTE);
644                report.add(ValidationIssue::new(
645                    Severity::Critical,
646                    Category::Structure,
647                    codes::ImfernoCode::ParseError,
648                    format!("Failed to parse IMF package: {e}"),
649                ));
650                return report.apply_rules(&options.rules);
651            }
652        };
653
654        package.validate(options)
655    }
656
657    /// Validate an already-parsed package. Applies rules from options.
658    pub fn validate(&self, options: &ValidationOptions) -> ValidationReport {
659        use crate::validation::{
660            validate_cpl_with_registry, ConfigurableValidatorRegistry, ValidatorSelection,
661        };
662
663        let selection = ValidatorSelection {
664            core_spec: options.core_spec,
665            app_specs: options.app_specs.clone(),
666            ..Default::default()
667        };
668        let registry = ConfigurableValidatorRegistry::new(selection);
669        #[cfg(not(target_arch = "wasm32"))]
670        let skip_disk = options.skip_disk_checks;
671        #[cfg(target_arch = "wasm32")]
672        let skip_disk = false;
673        let report = self.validate_package_structure_with_cpl_validator(
674            |cpl| validate_cpl_with_registry(cpl, &registry),
675            skip_disk,
676        );
677        let report = self.enrich_cpl_locations(report);
678        let report = report.apply_rules(&options.rules);
679        if options.aggregate_repeats {
680            report.aggregate()
681        } else {
682            report
683        }
684    }
685
686    /// Validate + verify file hashes (expensive — reads every asset).
687    ///
688    /// Hash verification is only available on native targets (not WASM).
689    #[cfg(not(target_arch = "wasm32"))]
690    pub fn validate_hashes(&self, options: &ValidationOptions) -> ValidationReport {
691        use crate::validation::{
692            validate_cpl_with_registry, ConfigurableValidatorRegistry, ValidatorSelection,
693        };
694
695        let selection = ValidatorSelection {
696            core_spec: options.core_spec,
697            app_specs: options.app_specs.clone(),
698            ..Default::default()
699        };
700        let registry = ConfigurableValidatorRegistry::new(selection);
701        let report = self.validate_package_with_hashes_with_cpl_validator(|cpl| {
702            validate_cpl_with_registry(cpl, &registry)
703        });
704        let report = self.enrich_cpl_locations(report);
705        let report = report.apply_rules(&options.rules);
706        if options.aggregate_repeats {
707            report.aggregate()
708        } else {
709            report
710        }
711    }
712
713    /// Enrich all validation issues that have a `cpl_id` with the CPL's
714    /// filename and content title. This is called once after validation so
715    /// that every issue gets human-readable CPL context regardless of where
716    /// it was emitted.
717    fn enrich_cpl_locations(&self, mut report: ValidationReport) -> ValidationReport {
718        // Build lookup: cpl_id -> (filename, title)
719        let mut cpl_info: std::collections::HashMap<ImfUuid, (Option<String>, String)> =
720            std::collections::HashMap::new();
721        for (uuid, cpl) in &self.composition_playlists {
722            let filename = self
723                .asset_paths
724                .get(uuid)
725                .and_then(|p| p.file_name())
726                .and_then(|n| n.to_str())
727                .map(|s| s.to_string());
728            cpl_info.insert(*uuid, (filename, cpl.content_title.to_string()));
729        }
730
731        let enrich = |issue: &mut ValidationIssue| {
732            if let Some(ref cpl_id) = issue.location.cpl_id {
733                if let Some((filename, title)) = cpl_info.get(cpl_id) {
734                    if issue.location.cpl_filename.is_none() {
735                        issue.location.cpl_filename = filename.clone();
736                    }
737                    if issue.location.cpl_title.is_none() {
738                        issue.location.cpl_title = Some(title.clone());
739                    }
740                }
741            }
742        };
743
744        for issue in &mut report.critical {
745            enrich(issue);
746        }
747        for issue in &mut report.errors {
748            enrich(issue);
749        }
750        for issue in &mut report.warnings {
751            enrich(issue);
752        }
753        for issue in &mut report.info {
754            enrich(issue);
755        }
756        report
757    }
758
759    /// Parse an IMF package from an in-memory filename→XML string map.
760    ///
761    /// Intended for WASM and test contexts where no filesystem is available.
762    /// File hashes and existence checks are skipped unless keys are absolute paths
763    /// (as produced by `read_dir`), in which case `root_path` is derived from
764    /// the common parent directory.
765    ///
766    /// Lookup is case-insensitive on the file basename, so both
767    /// `"ASSETMAP.xml"` and `"assetmap.xml"` resolve correctly.
768    fn from_file_map(files: &HashMap<String, String>) -> Result<Self> {
769        // Derive root_path from the keys if they are absolute paths.
770        // `read_dir` produces absolute paths as keys; WASM callers use plain basenames.
771        let root_path: PathBuf = files
772            .keys()
773            .filter_map(|k| {
774                let p = std::path::Path::new(k.as_str());
775                if p.is_absolute() {
776                    p.parent().map(|par| par.to_path_buf())
777                } else {
778                    None
779                }
780            })
781            .next()
782            .unwrap_or_default();
783
784        // Case-insensitive basename lookup helper.
785        let find = |name: &str| -> Option<&str> {
786            let lower = name.to_lowercase();
787            files
788                .iter()
789                .find(|(k, _)| {
790                    let key_basename = std::path::Path::new(k.as_str())
791                        .file_name()
792                        .and_then(|f| f.to_str())
793                        .unwrap_or(k.as_str());
794                    key_basename.to_lowercase() == lower
795                })
796                .map(|(_, v)| v.as_str())
797        };
798
799        // VOLINDEX.xml — optional per ST 429-9; issues collected here, emitted in validation.
800        let mut volindex_issues: Vec<ValidationIssue> = Vec::new();
801        let volume_index = match find("VOLINDEX.xml") {
802            Some(xml) => match crate::assetmap::parse_volindex(xml) {
803                Ok(vi) => vi,
804                Err(e) => {
805                    volindex_issues.push(ValidationIssue::new(
806                        Severity::Error,
807                        Category::Structure,
808                        codes::St429_9_2014::MalformedXml,
809                        format!("VOLINDEX.xml is not well-formed XML: {e}"),
810                    ));
811                    VolumeIndex { index: 1 }
812                }
813            },
814            None => {
815                volindex_issues.push(ValidationIssue::new(
816                    Severity::Info,
817                    Category::Structure,
818                    codes::St429_9_2014::VolindexMissing,
819                    "VOLINDEX.xml is absent; single-volume package assumed",
820                ));
821                VolumeIndex { index: 1 }
822            }
823        };
824
825        // ASSETMAP.xml — required
826        let assetmap_xml = find("ASSETMAP.xml")
827            .ok_or_else(|| ImfError::MissingFile("ASSETMAP.xml".to_string()))?;
828        let asset_map = crate::assetmap::parse_assetmap(assetmap_xml)?;
829
830        // Asset UUID → path mapping.
831        // When root_path is known (native disk load), build absolute paths
832        // with path traversal protection. Otherwise keep relative paths (WASM).
833        let mut asset_paths: HashMap<ImfUuid, PathBuf> = HashMap::new();
834        let mut parse_issues: Vec<ValidationIssue> = Vec::new();
835        for asset in &asset_map.asset_list.assets {
836            for chunk in &asset.chunk_list.chunks {
837                let path = if root_path.as_os_str().is_empty() {
838                    // WASM / in-memory: no filesystem, keep relative path as-is
839                    Some(PathBuf::from(&chunk.path))
840                } else {
841                    sanitize_asset_path(&root_path, &chunk.path)
842                };
843                match path {
844                    Some(p) => {
845                        asset_paths.insert(asset.id, p);
846                    }
847                    None => {
848                        parse_issues.push(ValidationIssue::new(
849                            Severity::Error,
850                            Category::Structure,
851                            codes::ImfernoCode::PathTraversal,
852                            format!(
853                                "Asset '{}' chunk path '{}' escapes the package root directory",
854                                asset.id, chunk.path,
855                            ),
856                        ));
857                    }
858                }
859            }
860        }
861
862        // Parse PKLs
863        let mut packing_lists = HashMap::new();
864        for asset in &asset_map.asset_list.assets {
865            if asset.packing_list == Some(true) {
866                for chunk in &asset.chunk_list.chunks {
867                    let basename = std::path::Path::new(&chunk.path)
868                        .file_name()
869                        .and_then(|f| f.to_str())
870                        .unwrap_or(&chunk.path);
871                    if let Some(pkl_xml) = find(basename) {
872                        match crate::assetmap::parse_pkl(pkl_xml) {
873                            Ok(pkl) => {
874                                packing_lists.insert(asset.id, pkl);
875                            }
876                            Err(e) => {
877                                parse_issues.push(ValidationIssue::new(
878                                    Severity::Error,
879                                    Category::Structure,
880                                    codes::ImfernoCode::PklParseError,
881                                    format!("PKL '{}' parse error: {}", basename, e),
882                                ));
883                            }
884                        }
885                    }
886                }
887            }
888        }
889
890        // Collect XML asset IDs from PKL MIME types
891        let mut xml_asset_ids: std::collections::HashSet<ImfUuid> =
892            std::collections::HashSet::new();
893        for pkl in packing_lists.values() {
894            for pkl_asset in &pkl.asset_list.assets {
895                if pkl_asset.mime_type.is_xml() {
896                    xml_asset_ids.insert(pkl_asset.id);
897                }
898            }
899        }
900
901        // Parse CPLs, OPLs, and SCMs
902        let mut composition_playlists = HashMap::new();
903        let mut cpl_xml_content = HashMap::new();
904        let mut output_profile_lists = HashMap::new();
905        let mut sidecar_composition_maps = HashMap::new();
906        for asset in &asset_map.asset_list.assets {
907            if asset.packing_list == Some(true) {
908                continue;
909            }
910            for chunk in &asset.chunk_list.chunks {
911                if !chunk.path.ends_with(".xml") {
912                    continue;
913                }
914                let is_candidate = if !xml_asset_ids.is_empty() {
915                    xml_asset_ids.contains(&asset.id)
916                } else {
917                    true
918                };
919                if !is_candidate {
920                    continue;
921                }
922
923                let basename = std::path::Path::new(&chunk.path)
924                    .file_name()
925                    .and_then(|f| f.to_str())
926                    .unwrap_or(&chunk.path);
927                if let Some(xml) = find(basename) {
928                    match crate::cpl::parse_cpl(xml) {
929                        Ok(cpl) => {
930                            cpl_xml_content.insert(asset.id, xml.to_string());
931                            composition_playlists.insert(asset.id, cpl);
932                        }
933                        Err(cpl_err) => {
934                            if let Ok(opl) = crate::assetmap::parse_opl(xml) {
935                                output_profile_lists.insert(asset.id, opl);
936                            } else if let Ok(scm) = crate::scm::parse_scm(xml) {
937                                sidecar_composition_maps.insert(asset.id, scm);
938                            } else {
939                                parse_issues.push(ValidationIssue::new(
940                                    Severity::Warning,
941                                    Category::Structure,
942                                    codes::ImfernoCode::XmlAssetParseError,
943                                    format!(
944                                        "XML asset '{}' ({}) could not be parsed as CPL, OPL, or SCM: {}",
945                                        basename, asset.id, cpl_err,
946                                    ),
947                                ));
948                            }
949                        }
950                    }
951                }
952            }
953        }
954
955        Ok(Imferno {
956            root_path,
957            volume_index,
958            volindex_issues,
959            parse_issues,
960            asset_map,
961            packing_lists,
962            composition_playlists,
963            cpl_xml_content,
964            output_profile_lists,
965            sidecar_composition_maps,
966            asset_paths,
967        })
968    }
969
970    /// Get CPL by UUID
971    pub fn get_cpl(&self, uuid: ImfUuid) -> Option<&CompositionPlaylist> {
972        self.composition_playlists.get(&uuid)
973    }
974
975    /// Get CPL by UUID string (convenience for callers with string UUIDs)
976    pub fn get_cpl_str(&self, uuid: &str) -> Option<&CompositionPlaylist> {
977        ImfUuid::parse(uuid)
978            .ok()
979            .and_then(|u| self.composition_playlists.get(&u))
980    }
981
982    /// Get asset file path by UUID
983    pub fn get_asset_path(&self, uuid: ImfUuid) -> Option<&PathBuf> {
984        self.asset_paths.get(&uuid)
985    }
986
987    /// Get asset file path by UUID string (convenience)
988    pub fn get_asset_path_str(&self, uuid: &str) -> Option<&PathBuf> {
989        ImfUuid::parse(uuid)
990            .ok()
991            .and_then(|u| self.asset_paths.get(&u))
992    }
993
994    /// List all CPL UUIDs
995    pub fn list_cpl_uuids(&self) -> Vec<ImfUuid> {
996        self.composition_playlists.keys().copied().collect()
997    }
998
999    /// Get main CPL (first one found)
1000    pub fn get_main_cpl(&self) -> Option<&CompositionPlaylist> {
1001        self.composition_playlists.values().next()
1002    }
1003
1004    /// Return AssetMap assets that have no known relationship to any CPL.
1005    ///
1006    /// An asset is "unreferenced" when it is:
1007    /// - not a CPL, PKL, SCM, or OPL document
1008    /// - not referenced by any CPL Virtual Track's `TrackFileId`
1009    /// - not declared as a sidecar in any SCM
1010    ///
1011    /// These are typically sidecar essences (e.g. Dolby Atmos MXF) delivered
1012    /// without an accompanying SCM document.
1013    pub fn unreferenced_assets(&self) -> Vec<&crate::assetmap::Asset> {
1014        use std::collections::HashSet;
1015
1016        // UUIDs of all document assets we have parsed
1017        let doc_ids: HashSet<ImfUuid> = self
1018            .composition_playlists
1019            .keys()
1020            .chain(self.packing_lists.keys())
1021            .chain(self.sidecar_composition_maps.keys())
1022            .chain(self.output_profile_lists.keys())
1023            .copied()
1024            .collect();
1025
1026        // TrackFileIds referenced by any CPL Virtual Track
1027        let track_file_ids: HashSet<ImfUuid> = self
1028            .composition_playlists
1029            .values()
1030            .flat_map(|cpl| cpl.segment_list.segments.iter())
1031            .flat_map(|seg| {
1032                seg.sequence_list
1033                    .all_sequences()
1034                    .into_iter()
1035                    .flat_map(|seq| {
1036                        seq.resource_list()
1037                            .resources
1038                            .iter()
1039                            .filter_map(|r| r.track_file_id)
1040                    })
1041                    .collect::<Vec<_>>()
1042            })
1043            .collect();
1044
1045        // Asset IDs already declared as SCM sidecars
1046        let scm_declared: HashSet<ImfUuid> = self
1047            .sidecar_composition_maps
1048            .values()
1049            .flat_map(|scm| scm.sidecar_assets.iter().map(|sa| sa.id))
1050            .collect();
1051
1052        self.asset_map
1053            .asset_list
1054            .assets
1055            .iter()
1056            .filter(|a| {
1057                a.packing_list != Some(true)
1058                    && !doc_ids.contains(&a.id)
1059                    && !track_file_ids.contains(&a.id)
1060                    && !scm_declared.contains(&a.id)
1061            })
1062            .collect()
1063    }
1064
1065    /// Emit `ImfernoCode::UnreferencedAsset` info findings into `report` for each
1066    /// asset that has no CPL Virtual Track reference and no SCM declaration.
1067    fn emit_unreferenced_asset_info(&self, report: &mut ValidationReport) {
1068        use crate::diagnostics::codes::ValidationCode as _;
1069        for asset in self.unreferenced_assets() {
1070            let path = asset
1071                .chunk_list
1072                .chunks
1073                .first()
1074                .map(|c| c.path.as_str())
1075                .unwrap_or("(unknown)");
1076            report.add(ValidationIssue::new(
1077                Severity::Info,
1078                Category::Structure,
1079                codes::ImfernoCode::UnreferencedAsset.code(),
1080                format!(
1081                    "Asset '{}' ({}) is present in the AssetMap but not referenced by any CPL \
1082                     Virtual Track and has no SCM declaration",
1083                    path, asset.id,
1084                ),
1085            ));
1086        }
1087    }
1088
1089    /// Emit `ImfernoCode::UnlistedEssence` warnings for any file in the
1090    /// package directory that is not accounted for by the AssetMap, PKL,
1091    /// VOLINDEX, or ASSETMAP itself.
1092    ///
1093    /// Scans the root directory non-recursively.  Skipped on WASM and when
1094    /// `root_path` is unset (in-memory / WASM packages).
1095    #[cfg(not(target_arch = "wasm32"))]
1096    fn emit_unlisted_essence(&self, report: &mut ValidationReport) {
1097        use crate::diagnostics::codes::ValidationCode as _;
1098        if self.root_path.as_os_str().is_empty() {
1099            return;
1100        }
1101
1102        // All filenames listed as chunks in the AssetMap.
1103        let mut known: std::collections::HashSet<String> = self
1104            .asset_map
1105            .asset_list
1106            .assets
1107            .iter()
1108            .flat_map(|a| a.chunk_list.chunks.iter())
1109            .filter_map(|c| {
1110                std::path::Path::new(&c.path)
1111                    .file_name()
1112                    .map(|n| n.to_string_lossy().into_owned())
1113            })
1114            .collect();
1115
1116        // Package infrastructure files are always expected.
1117        known.insert("ASSETMAP.xml".into());
1118        known.insert("VOLINDEX.xml".into());
1119        // Case variants seen in the wild.
1120        known.insert("assetmap.xml".into());
1121        known.insert("volindex.xml".into());
1122        known.insert("ASSETMAP".into());
1123        known.insert("VOLINDEX".into());
1124
1125        let entries = match std::fs::read_dir(&self.root_path) {
1126            Ok(e) => e,
1127            Err(e) => {
1128                report.add(ValidationIssue::new(
1129                    Severity::Info,
1130                    Category::Structure,
1131                    codes::ImfernoCode::ReadDirError,
1132                    format!("Could not scan package directory for unlisted files: {}", e,),
1133                ));
1134                return;
1135            }
1136        };
1137
1138        for entry in entries {
1139            let entry = match entry {
1140                Ok(e) => e,
1141                Err(e) => {
1142                    report.add(ValidationIssue::new(
1143                        Severity::Info,
1144                        Category::Structure,
1145                        codes::ImfernoCode::DirEntryError,
1146                        format!("Could not read directory entry: {}", e),
1147                    ));
1148                    continue;
1149                }
1150            };
1151            let path = entry.path();
1152            // Skip directories
1153            if path.is_dir() {
1154                continue;
1155            }
1156            let filename = match path.file_name() {
1157                Some(n) => n.to_string_lossy().into_owned(),
1158                None => continue,
1159            };
1160            // Case-insensitive match against known files
1161            if known.iter().any(|k| k.eq_ignore_ascii_case(&filename)) {
1162                continue;
1163            }
1164            report.add(ValidationIssue::new(
1165                Severity::Warning,
1166                Category::Structure,
1167                codes::ImfernoCode::UnlistedEssence.code(),
1168                format!(
1169                    "File '{}' is present in the package directory but not listed in the AssetMap",
1170                    filename,
1171                ),
1172            ));
1173        }
1174    }
1175
1176    /// Check package structure, returning an error if any critical or error issues are found.
1177    ///
1178    /// Not currently wired into the public API; retained for potential future use.
1179    #[allow(dead_code)]
1180    pub(crate) fn validate_structure(&self) -> Result<()> {
1181        // Run the comprehensive package structure validation and convert to Result
1182        let report = self.validate_package_structure();
1183        if report.has_critical() || report.has_errors() {
1184            let error_messages: Vec<String> = report
1185                .errors
1186                .iter()
1187                .chain(report.critical.iter())
1188                .map(|i| i.message.clone())
1189                .collect();
1190            return Err(ImfError::InvalidStructure(error_messages.join("; ")));
1191        }
1192        Ok(())
1193    }
1194
1195    /// Validate that every PKL asset exists on disk and has the correct file size.
1196    ///
1197    /// Returns a list of `FileValidationError` describing any mismatches found.
1198    /// An empty vec means the manifest is consistent.
1199    pub fn validate_file_manifest(&self) -> Vec<FileValidationError> {
1200        let mut errors = Vec::new();
1201
1202        // Build UUID → path mapping from AssetMap
1203        let path_map = self.build_asset_path_map();
1204
1205        for pkl in self.packing_lists.values() {
1206            for asset in &pkl.asset_list.assets {
1207                let uuid_str = asset.id.to_string();
1208                match path_map.get(&asset.id) {
1209                    None => {
1210                        errors.push(FileValidationError::NotInAssetMap {
1211                            uuid: uuid_str,
1212                            original_file_name: asset.original_file_name.clone(),
1213                        });
1214                    }
1215                    Some(abs_path) => match std::fs::metadata(abs_path) {
1216                        Err(e) => {
1217                            if e.kind() == std::io::ErrorKind::NotFound {
1218                                errors.push(FileValidationError::Missing {
1219                                    uuid: uuid_str,
1220                                    path: abs_path.clone(),
1221                                });
1222                            } else {
1223                                errors.push(FileValidationError::Io {
1224                                    uuid: uuid_str,
1225                                    path: abs_path.clone(),
1226                                    message: format!("Cannot access file: {}", e),
1227                                });
1228                            }
1229                        }
1230                        Ok(meta) => {
1231                            let actual = meta.len();
1232                            if actual != asset.size {
1233                                errors.push(FileValidationError::SizeMismatch {
1234                                    uuid: uuid_str,
1235                                    path: abs_path.clone(),
1236                                    expected: asset.size,
1237                                    actual,
1238                                });
1239                            }
1240                        }
1241                    },
1242                }
1243            }
1244        }
1245
1246        errors
1247    }
1248
1249    /// Validate file hashes (SHA-1 or SHA-256) for every PKL asset on disk.
1250    ///
1251    /// Per SMPTE ST 2067-2 §9, PKL assets carry hashes with an algorithm
1252    /// specified by the `<HashAlgorithm>` element (defaulting to SHA-1).
1253    ///
1254    /// This is slow — it reads every file. Use `validate_file_manifest` for a
1255    /// fast size-only check. Returns a list of `FileValidationError` describing
1256    /// hash mismatches (missing / size issues are also reported).
1257    pub fn validate_file_hashes(&self) -> Vec<FileValidationError> {
1258        self.validate_file_hashes_with_progress(|_, _, _, _, _| {})
1259    }
1260
1261    /// Like `validate_file_hashes` but calls `on_progress(current, total, filename, bytes_done, bytes_total)`
1262    /// during hashing. Updates both per-file and within-file progress.
1263    pub fn validate_file_hashes_with_progress(
1264        &self,
1265        mut on_progress: impl FnMut(usize, usize, &str, u64, u64),
1266    ) -> Vec<FileValidationError> {
1267        let mut errors = self.validate_file_manifest();
1268        let errored_uuids: std::collections::HashSet<String> =
1269            errors.iter().map(|e| e.uuid().to_string()).collect();
1270
1271        let path_map = self.build_asset_path_map();
1272
1273        // Count total assets to hash
1274        let total: usize = self
1275            .packing_lists
1276            .values()
1277            .map(|pkl| pkl.asset_list.assets.len())
1278            .sum();
1279        let mut current: usize = 0;
1280
1281        for pkl in self.packing_lists.values() {
1282            for asset in &pkl.asset_list.assets {
1283                current += 1;
1284                let uuid_str = asset.id.to_string();
1285                if errored_uuids.contains(&uuid_str) {
1286                    continue;
1287                }
1288                let Some(abs_path) = path_map.get(&asset.id) else {
1289                    continue;
1290                };
1291
1292                let filename = abs_path.file_name().and_then(|n| n.to_str()).unwrap_or("?");
1293                let file_size = std::fs::metadata(abs_path).map(|m| m.len()).unwrap_or(0);
1294                on_progress(current, total, filename, 0, file_size);
1295
1296                match std::fs::File::open(abs_path) {
1297                    Err(e) => {
1298                        errors.push(FileValidationError::Io {
1299                            uuid: uuid_str,
1300                            path: abs_path.clone(),
1301                            message: e.to_string(),
1302                        });
1303                    }
1304                    Ok(file) => {
1305                        use std::io::Read;
1306                        let mut reader = std::io::BufReader::with_capacity(1024 * 1024, file);
1307                        let mut bytes_done: u64 = 0;
1308                        let mut had_error = false;
1309                        let actual_b64 = match asset.hash.algorithm() {
1310                            crate::assetmap::HashAlgorithm::Sha1 => {
1311                                use sha1::Digest;
1312                                let mut hasher = sha1::Sha1::new();
1313                                let mut buf = vec![0u8; 1024 * 1024];
1314                                loop {
1315                                    match reader.read(&mut buf) {
1316                                        Ok(0) => break,
1317                                        Ok(n) => {
1318                                            hasher.update(&buf[..n]);
1319                                            bytes_done += n as u64;
1320                                            on_progress(
1321                                                current, total, filename, bytes_done, file_size,
1322                                            );
1323                                        }
1324                                        Err(e) => {
1325                                            errors.push(FileValidationError::Io {
1326                                                uuid: uuid_str.clone(),
1327                                                path: abs_path.clone(),
1328                                                message: e.to_string(),
1329                                            });
1330                                            had_error = true;
1331                                            break;
1332                                        }
1333                                    }
1334                                }
1335                                base64::Engine::encode(
1336                                    &base64::engine::general_purpose::STANDARD,
1337                                    hasher.finalize(),
1338                                )
1339                            }
1340                            crate::assetmap::HashAlgorithm::Sha256 => {
1341                                use sha2::Digest;
1342                                let mut hasher = sha2::Sha256::new();
1343                                let mut buf = vec![0u8; 1024 * 1024];
1344                                loop {
1345                                    match reader.read(&mut buf) {
1346                                        Ok(0) => break,
1347                                        Ok(n) => {
1348                                            hasher.update(&buf[..n]);
1349                                            bytes_done += n as u64;
1350                                            on_progress(
1351                                                current, total, filename, bytes_done, file_size,
1352                                            );
1353                                        }
1354                                        Err(e) => {
1355                                            errors.push(FileValidationError::Io {
1356                                                uuid: uuid_str.clone(),
1357                                                path: abs_path.clone(),
1358                                                message: e.to_string(),
1359                                            });
1360                                            had_error = true;
1361                                            break;
1362                                        }
1363                                    }
1364                                }
1365                                base64::Engine::encode(
1366                                    &base64::engine::general_purpose::STANDARD,
1367                                    hasher.finalize(),
1368                                )
1369                            }
1370                        };
1371                        if !had_error {
1372                            let expected_b64 = asset.hash.to_base64();
1373                            if actual_b64 != expected_b64 {
1374                                errors.push(FileValidationError::HashMismatch {
1375                                    uuid: uuid_str,
1376                                    path: abs_path.clone(),
1377                                    expected: expected_b64,
1378                                    actual: actual_b64,
1379                                });
1380                            }
1381                        }
1382                    }
1383                }
1384            }
1385        }
1386
1387        errors
1388    }
1389
1390    /// Returns the total number of bytes to be hashed, for progress bar setup.
1391    /// Call this before `validate_file_hashes_parallel` to know the total size.
1392    ///
1393    /// Requires the `tokio` feature.
1394    #[cfg(feature = "tokio")]
1395    pub fn hash_verification_size(&self) -> u64 {
1396        let path_map = self.build_asset_path_map();
1397        self.packing_lists
1398            .values()
1399            .flat_map(|pkl| pkl.asset_list.assets.iter())
1400            .filter_map(|asset| {
1401                path_map
1402                    .get(&asset.id)
1403                    .and_then(|p| std::fs::metadata(p).ok())
1404                    .map(|m| m.len())
1405            })
1406            .sum()
1407    }
1408
1409    /// Per-file progress state for parallel hash verification.
1410    #[cfg(feature = "tokio")]
1411    pub async fn validate_file_hashes_parallel(
1412        &self,
1413        concurrency: usize,
1414        progress: std::sync::Arc<HashProgressTracker>,
1415    ) -> Vec<FileValidationError> {
1416        use std::sync::Arc;
1417
1418        let path_map = self.build_asset_path_map();
1419        let semaphore = Arc::new(tokio::sync::Semaphore::new(concurrency));
1420        let mut handles = Vec::new();
1421
1422        // First pass: validate file manifest (sync, fast)
1423        let manifest_errors = self.validate_file_manifest();
1424        let errored_uuids: std::collections::HashSet<String> = manifest_errors
1425            .iter()
1426            .map(|e| e.uuid().to_string())
1427            .collect();
1428
1429        // Collect assets to hash, sorted smallest first for fast early progress
1430        let mut assets_to_hash: Vec<_> = self
1431            .packing_lists
1432            .values()
1433            .flat_map(|pkl| pkl.asset_list.assets.iter())
1434            .filter(|asset| !errored_uuids.contains(&asset.id.to_string()))
1435            .filter(|asset| path_map.contains_key(&asset.id))
1436            .collect();
1437        assets_to_hash.sort_by_key(|a| a.size);
1438
1439        // Register and spawn hash tasks
1440        for asset in assets_to_hash {
1441            let abs_path = path_map.get(&asset.id).unwrap();
1442
1443            let filename = abs_path
1444                .file_name()
1445                .and_then(|n| n.to_str())
1446                .unwrap_or("?")
1447                .to_string();
1448            let file_size = asset.size;
1449            let (bytes_counter, status_flag) = progress.register(filename, file_size);
1450
1451            let uuid_str = asset.id.to_string();
1452            let abs_path = abs_path.clone();
1453            let expected_b64 = asset.hash.to_base64();
1454            let algorithm = asset.hash.algorithm();
1455            let sem = semaphore.clone();
1456
1457            let err_uuid = uuid_str.clone();
1458            let err_path = abs_path.clone();
1459            handles.push(tokio::spawn(async move {
1460                let _permit = sem.acquire().await.unwrap();
1461                status_flag.store(1, std::sync::atomic::Ordering::Relaxed); // Hashing
1462                let result = match tokio::task::spawn_blocking(move || {
1463                    hash_single_file(
1464                        &uuid_str,
1465                        &abs_path,
1466                        &expected_b64,
1467                        algorithm,
1468                        &bytes_counter,
1469                    )
1470                })
1471                .await
1472                {
1473                    Ok(r) => r,
1474                    Err(e) => Some(FileValidationError::Io {
1475                        uuid: err_uuid,
1476                        path: err_path,
1477                        message: format!("hash task failed: {}", e),
1478                    }),
1479                };
1480
1481                status_flag.store(
1482                    if result.is_some() { 3 } else { 2 }, // Failed or Done
1483                    std::sync::atomic::Ordering::Relaxed,
1484                );
1485                result
1486            }));
1487        }
1488
1489        // Collect results
1490        let mut errors = manifest_errors;
1491        for handle in handles {
1492            if let Ok(Some(err)) = handle.await {
1493                errors.push(err);
1494            }
1495        }
1496
1497        errors
1498    }
1499
1500    /// Validate PKL structural constraints per SMPTE ST 2067-2.
1501    ///
1502    /// Checks:
1503    /// - §9: No duplicate asset UUIDs within a single PKL
1504    /// - §7/9: Every PKL asset UUID exists in the AssetMap
1505    pub fn validate_pkl_constraints(&self) -> Vec<FileValidationError> {
1506        use crate::assetmap::PklNamespace;
1507        let mut errors = Vec::new();
1508
1509        // Build AssetMap UUID set + PackingList-flagged subset for the
1510        // ST 429-9 §6.3 cross-doc checks.
1511        let assetmap_ids: std::collections::HashSet<ImfUuid> = self
1512            .asset_map
1513            .asset_list
1514            .assets
1515            .iter()
1516            .map(|a| a.id)
1517            .collect();
1518        let assetmap_pkl_ids: std::collections::HashSet<ImfUuid> = self
1519            .asset_map
1520            .asset_list
1521            .assets
1522            .iter()
1523            .filter(|a| a.packing_list.unwrap_or(false))
1524            .map(|a| a.id)
1525            .collect();
1526
1527        // ST 429-9 §6.3: AssetMap must identify at least one PKL via
1528        // `<PackingList>true</PackingList>`. Fire whenever the AssetMap
1529        // has no such flag — this is true regardless of whether any
1530        // PKL files happen to be present on disk (the AssetMap is
1531        // authoritative on what the package is supposed to contain).
1532        if assetmap_pkl_ids.is_empty() {
1533            errors.push(FileValidationError::AssetMapHasNoPackingList);
1534        }
1535
1536        for pkl in self.packing_lists.values() {
1537            // ST 2067-2 §9: PKL namespace must be one of the published
1538            // SMPTE PKL namespace URIs. `PklNamespace::Unknown` is the
1539            // parser's sentinel for "not in the recognised set".
1540            if let PklNamespace::Unknown(uri) = &pkl.namespace {
1541                errors.push(FileValidationError::UnknownPklNamespace {
1542                    pkl_id: pkl.id.to_string(),
1543                    namespace: uri.clone(),
1544                });
1545            }
1546
1547            // ST 429-9 §6.3: every PKL document's Id must appear as a
1548            // PackingList-flagged asset in the AssetMap.
1549            if !assetmap_pkl_ids.contains(&pkl.id) {
1550                errors.push(FileValidationError::PklIdNotInAssetMap {
1551                    pkl_id: pkl.id.to_string(),
1552                });
1553            }
1554
1555            // ST 2067-2 §9: Check for duplicate asset IDs within this PKL
1556            let mut seen_ids: std::collections::HashSet<ImfUuid> = std::collections::HashSet::new();
1557            for asset in &pkl.asset_list.assets {
1558                if !seen_ids.insert(asset.id) {
1559                    errors.push(FileValidationError::DuplicatePklAssetId {
1560                        uuid: asset.id.to_string(),
1561                        pkl_id: pkl.id.to_string(),
1562                    });
1563                }
1564
1565                // ST 2067-2 §7: Every PKL asset must be in the AssetMap
1566                if !assetmap_ids.contains(&asset.id) {
1567                    errors.push(FileValidationError::NotInAssetMap {
1568                        uuid: asset.id.to_string(),
1569                        original_file_name: asset.original_file_name.clone(),
1570                    });
1571                }
1572            }
1573        }
1574
1575        errors
1576    }
1577
1578    /// Build a map from asset UUID to sanitized relative file path.
1579    ///
1580    /// Paths that would escape the package root (path traversal) are excluded.
1581    fn build_asset_path_map(&self) -> HashMap<ImfUuid, PathBuf> {
1582        let mut map = HashMap::new();
1583        let has_root = !self.root_path.as_os_str().is_empty();
1584        for asset in &self.asset_map.asset_list.assets {
1585            if let Some(chunk) = asset.chunk_list.chunks.first() {
1586                if has_root {
1587                    if let Some(safe_path) = sanitize_asset_path(&self.root_path, &chunk.path) {
1588                        map.insert(asset.id, safe_path);
1589                    }
1590                    // Traversal paths silently excluded — already reported at parse time
1591                } else {
1592                    map.insert(asset.id, PathBuf::from(&chunk.path));
1593                }
1594            }
1595        }
1596        map
1597    }
1598
1599    /// Comprehensive package-level validation producing a unified `ValidationReport`.
1600    ///
1601    /// Runs all structural and cross-reference checks that require package context
1602    /// (AssetMap, PKL, CPL relationships). This covers:
1603    ///
1604    /// - **ST 2067-2 §7/9:** PKL asset UUIDs exist in AssetMap
1605    /// - **ST 2067-2 §9:** No duplicate asset UUIDs within a PKL
1606    /// - **ST 2067-2 §7:** CPL TrackFileId references resolve in AssetMap
1607    /// - **ST 2067-2 §9:** File manifest (size) validation
1608    ///
1609    /// Callers should merge this with CPL-level validation results (e.g., from
1610    /// `crate::validation::ConstraintsValidator`) for a complete report.
1611    ///
1612    /// For hash verification (expensive I/O), use `validate_package_with_hashes()`.
1613    pub fn validate_package_structure(&self) -> ValidationReport {
1614        self.validate_package_structure_with_cpl_validator(|_| Vec::new(), false)
1615    }
1616
1617    /// Comprehensive package-level validation with optional CPL-level validator injection.
1618    ///
1619    /// This provides an extension seam for callers to plug in profile/spec CPL validators
1620    /// (e.g. registry-driven validators) without changing core package validation behavior.
1621    ///
1622    /// Set `skip_disk_checks` to `true` to skip file manifest (existence/size) and MXF header
1623    /// inspection. Useful for packages on slow or remote filesystems (e.g. S3 via MacFUSE).
1624    pub fn validate_package_structure_with_cpl_validator<F>(
1625        &self,
1626        cpl_validator: F,
1627        skip_disk_checks: bool,
1628    ) -> ValidationReport
1629    where
1630        F: Fn(&CompositionPlaylist) -> Vec<ValidationIssue>,
1631    {
1632        let mut report = ValidationReport::new(ValidationProfile::SMPTE);
1633
1634        // VOLINDEX diagnostics (ST 429-9) — emitted first
1635        for issue in &self.volindex_issues {
1636            report.add(issue.clone());
1637        }
1638
1639        // Parse-time diagnostics (PKL/CPL/OPL/SCM failures)
1640        for issue in &self.parse_issues {
1641            report.add(issue.clone());
1642        }
1643
1644        // PKL structural constraints (ST 2067-2 §7/9)
1645        for issue in self
1646            .validate_pkl_constraints()
1647            .iter()
1648            .map(ValidationIssue::from)
1649        {
1650            report.add(issue);
1651        }
1652
1653        // File manifest: every PKL asset exists on disk with correct size
1654        // (skipped on WASM — no real filesystem available, skipped when no root_path is set,
1655        //  and skipped when skip_disk_checks is true)
1656        #[cfg(not(target_arch = "wasm32"))]
1657        if !skip_disk_checks && !self.root_path.as_os_str().is_empty() {
1658            for issue in self
1659                .validate_file_manifest()
1660                .iter()
1661                .map(ValidationIssue::from)
1662            {
1663                report.add(issue);
1664            }
1665        }
1666
1667        // CPL TrackFileId → AssetMap cross-references
1668        for cpl in self.composition_playlists.values() {
1669            self.validate_cpl_asset_references_accumulating(cpl, &mut report);
1670
1671            // Optional external CPL-level validation injection
1672            for issue in cpl_validator(cpl) {
1673                report.add(issue);
1674            }
1675        }
1676
1677        // SCM reference checks (ST 2067-9:2018 §6)
1678        self.validate_scm_references(&mut report);
1679
1680        // Tool-level observations (not spec violations)
1681        self.emit_unreferenced_asset_info(&mut report);
1682
1683        // Multi-PKL consistency (ST 2067-2 §7)
1684        self.validate_multi_pkl_consistency(&mut report);
1685
1686        // MXF header cross-validation (ST 377-1) — skipped on WASM, when no root_path is set,
1687        // and when skip_disk_checks is true
1688        #[cfg(not(target_arch = "wasm32"))]
1689        if !skip_disk_checks && !self.root_path.as_os_str().is_empty() {
1690            self.validate_mxf_headers(&mut report);
1691            self.emit_unlisted_essence(&mut report);
1692        }
1693
1694        report
1695    }
1696
1697    /// Like `validate_package_structure()` but also verifies file hashes.
1698    ///
1699    /// **Warning:** This reads every asset file from disk to compute SHA-1/SHA-256
1700    /// digests. For large packages this can be slow.
1701    pub fn validate_package_with_hashes(&self) -> ValidationReport {
1702        self.validate_package_with_hashes_with_cpl_validator(|_| Vec::new())
1703    }
1704
1705    /// Hash-validating package-level validation with optional CPL-level validator injection.
1706    pub fn validate_package_with_hashes_with_cpl_validator<F>(
1707        &self,
1708        cpl_validator: F,
1709    ) -> ValidationReport
1710    where
1711        F: Fn(&CompositionPlaylist) -> Vec<ValidationIssue>,
1712    {
1713        let mut report = ValidationReport::new(ValidationProfile::SMPTE);
1714
1715        // VOLINDEX diagnostics (ST 429-9) — emitted first
1716        for issue in &self.volindex_issues {
1717            report.add(issue.clone());
1718        }
1719
1720        // Parse-time diagnostics (PKL/CPL/OPL/SCM failures)
1721        for issue in &self.parse_issues {
1722            report.add(issue.clone());
1723        }
1724
1725        // PKL structural constraints
1726        for issue in self
1727            .validate_pkl_constraints()
1728            .iter()
1729            .map(ValidationIssue::from)
1730        {
1731            report.add(issue);
1732        }
1733
1734        // File manifest + hash verification (subsumes validate_file_manifest)
1735        for issue in self
1736            .validate_file_hashes()
1737            .iter()
1738            .map(ValidationIssue::from)
1739        {
1740            report.add(issue);
1741        }
1742
1743        // CPL TrackFileId → AssetMap cross-references
1744        for cpl in self.composition_playlists.values() {
1745            self.validate_cpl_asset_references_accumulating(cpl, &mut report);
1746
1747            // Optional external CPL-level validation injection
1748            for issue in cpl_validator(cpl) {
1749                report.add(issue);
1750            }
1751        }
1752
1753        // Multi-PKL consistency
1754        self.validate_multi_pkl_consistency(&mut report);
1755
1756        // MXF header cross-validation (ST 377-1)
1757        self.validate_mxf_headers(&mut report);
1758
1759        report
1760    }
1761
1762    /// Validate Sidecar Composition Map references (ST 2067-9:2018).
1763    ///
1764    /// Enforces normative requirements from §5, §7.2.3, §7.2.4, §7.2.5, §7.3.1, §7.3.1.1.
1765    fn validate_scm_references(&self, report: &mut ValidationReport) {
1766        use std::collections::HashSet;
1767
1768        let asset_ids: HashSet<_> = self
1769            .asset_map
1770            .asset_list
1771            .assets
1772            .iter()
1773            .map(|a| a.id)
1774            .collect();
1775
1776        // §5: Collect all TrackFileIds referenced by any Virtual Track in any CPL.
1777        let virtual_track_file_ids: HashSet<ImfUuid> = self
1778            .composition_playlists
1779            .values()
1780            .flat_map(|cpl| cpl.segment_list.segments.iter())
1781            .flat_map(|seg| {
1782                seg.sequence_list
1783                    .all_sequences()
1784                    .into_iter()
1785                    .flat_map(|seq| {
1786                        seq.resource_list()
1787                            .resources
1788                            .iter()
1789                            .filter_map(|r| r.track_file_id)
1790                    })
1791                    .collect::<Vec<_>>()
1792            })
1793            .collect();
1794
1795        for scm in self.sidecar_composition_maps.values() {
1796            // §7.2.4: Signer present → Signature must be present.
1797            if scm.has_signer && !scm.has_signature {
1798                report.add(
1799                    ValidationIssue::new(
1800                        Severity::Error,
1801                        Category::Reference,
1802                        codes::St2067_9_2018::SignerWithoutSignature,
1803                        format!(
1804                            "SCM {}: Signer element present but Signature element is absent",
1805                            scm.id
1806                        ),
1807                    )
1808                    .with_context("scm_id", scm.id.to_string()),
1809                );
1810            }
1811
1812            // §7.2.5: Signature present → Signer must be present.
1813            if scm.has_signature && !scm.has_signer {
1814                report.add(
1815                    ValidationIssue::new(
1816                        Severity::Error,
1817                        Category::Reference,
1818                        codes::St2067_9_2018::SignatureWithoutSigner,
1819                        format!(
1820                            "SCM {}: Signature element present but Signer element is absent",
1821                            scm.id
1822                        ),
1823                    )
1824                    .with_context("scm_id", scm.id.to_string()),
1825                );
1826            }
1827
1828            let mut seen_asset_ids = HashSet::new();
1829            for sidecar_asset in &scm.sidecar_assets {
1830                // §7.2.3: Duplicate SidecarAsset Id within SidecarAssetList.
1831                if !seen_asset_ids.insert(sidecar_asset.id) {
1832                    report.add(
1833                        ValidationIssue::new(
1834                            Severity::Error,
1835                            Category::Reference,
1836                            codes::St2067_9_2018::DuplicateAssetId,
1837                            format!(
1838                                "Duplicate SidecarAsset Id {} in SCM {}",
1839                                sidecar_asset.id, scm.id
1840                            ),
1841                        )
1842                        .with_context("scm_id", scm.id.to_string())
1843                        .with_context("asset_id", sidecar_asset.id.to_string()),
1844                    );
1845                }
1846
1847                // §7.3.1: SidecarAsset Id must exist in the AssetMap.
1848                if !asset_ids.contains(&sidecar_asset.id) {
1849                    report.add(
1850                        ValidationIssue::new(
1851                            Severity::Error,
1852                            Category::Reference,
1853                            codes::St2067_9_2018::SidecarAssetNotFound,
1854                            format!(
1855                                "SCM {} references sidecar asset {} not found in AssetMap",
1856                                scm.id, sidecar_asset.id
1857                            ),
1858                        )
1859                        .with_context("scm_id", scm.id.to_string())
1860                        .with_context("asset_id", sidecar_asset.id.to_string()),
1861                    );
1862                }
1863
1864                // §5: Sidecar asset shall not be referenced by any Virtual Track.
1865                if virtual_track_file_ids.contains(&sidecar_asset.id) {
1866                    report.add(
1867                        ValidationIssue::new(
1868                            Severity::Error,
1869                            Category::Reference,
1870                            codes::St2067_9_2018::SidecarAssetReferencedByVirtualTrack,
1871                            format!(
1872                            "Sidecar asset {} (SCM {}) is referenced by a Virtual Track in a CPL",
1873                            sidecar_asset.id, scm.id
1874                        ),
1875                        )
1876                        .with_context("scm_id", scm.id.to_string())
1877                        .with_context("asset_id", sidecar_asset.id.to_string()),
1878                    );
1879                }
1880
1881                // §7.3.1.1: CPL Ids within AssociatedCPLList.
1882                let mut seen_cpl_ids = HashSet::new();
1883                for cpl_id in &sidecar_asset.cpl_ids {
1884                    // No duplicate CPLIds within one AssociatedCPLList.
1885                    if !seen_cpl_ids.insert(*cpl_id) {
1886                        report.add(ValidationIssue::new(
1887                            Severity::Error,
1888                            Category::Reference,
1889                            codes::St2067_9_2018::DuplicateCplId,
1890                            format!(
1891                                "Duplicate CPLId {} in AssociatedCPLList of sidecar asset {} (SCM {})",
1892                                cpl_id, sidecar_asset.id, scm.id
1893                            ),
1894                        ).with_context("scm_id", scm.id.to_string())
1895                         .with_context("asset_id", sidecar_asset.id.to_string())
1896                         .with_context("cpl_id", cpl_id.to_string()));
1897                    }
1898
1899                    // Each CPLId must reference a known CPL in the package.
1900                    if !self.composition_playlists.contains_key(cpl_id) {
1901                        report.add(ValidationIssue::new(
1902                            Severity::Error,
1903                            Category::Reference,
1904                            codes::St2067_9_2018::CplNotFound,
1905                            format!(
1906                                "SCM {} sidecar asset {} references CPL {} which is not known in this package",
1907                                scm.id, sidecar_asset.id, cpl_id
1908                            ),
1909                        ).with_context("scm_id", scm.id.to_string())
1910                         .with_context("asset_id", sidecar_asset.id.to_string())
1911                         .with_context("cpl_id", cpl_id.to_string()));
1912                    }
1913                }
1914            }
1915        }
1916    }
1917
1918    /// Validate consistency across multiple PKLs.
1919    ///
1920    /// Per ST 2067-2 §7, when the same asset UUID appears in multiple PKLs,
1921    /// the hash and size must be identical. Conflicting metadata indicates
1922    /// a corrupt or inconsistent package delivery.
1923    fn validate_multi_pkl_consistency(&self, report: &mut ValidationReport) {
1924        if self.packing_lists.len() < 2 {
1925            return; // Nothing to cross-validate
1926        }
1927
1928        // Build: asset UUID → Vec<(pkl_id, hash_b64, size)>
1929        let mut asset_records: HashMap<ImfUuid, Vec<(ImfUuid, String, u64)>> = HashMap::new();
1930        for (pkl_id, pkl) in &self.packing_lists {
1931            for asset in &pkl.asset_list.assets {
1932                asset_records.entry(asset.id).or_default().push((
1933                    *pkl_id,
1934                    asset.hash.to_base64(),
1935                    asset.size,
1936                ));
1937            }
1938        }
1939
1940        for (asset_id, records) in &asset_records {
1941            if records.len() < 2 {
1942                continue;
1943            }
1944            let (first_pkl, ref first_hash, first_size) = records[0];
1945            for (pkl_id, hash, size) in &records[1..] {
1946                if hash != first_hash {
1947                    report.add(
1948                        ValidationIssue::new(
1949                            Severity::Error,
1950                            Category::Asset,
1951                            codes::St2067_2_2020::ChecksumMismatch,
1952                            format!(
1953                                "Asset {} has different hashes in PKL {} ({}) vs PKL {} ({})",
1954                                asset_id,
1955                                &first_pkl.to_string()[..8],
1956                                &first_hash[..8.min(first_hash.len())],
1957                                &pkl_id.to_string()[..8],
1958                                &hash[..8.min(hash.len())],
1959                            ),
1960                        )
1961                        .with_context("asset_uuid", asset_id.to_string()),
1962                    );
1963                }
1964                if *size != first_size {
1965                    report.add(
1966                        ValidationIssue::new(
1967                            Severity::Error,
1968                            Category::Asset,
1969                            codes::St2067_2_2020::SizeMismatch,
1970                            format!(
1971                                "Asset {} has different sizes in PKL {} ({} bytes) vs PKL {} ({} bytes)",
1972                                asset_id,
1973                                &first_pkl.to_string()[..8],
1974                                first_size,
1975                                &pkl_id.to_string()[..8],
1976                                size,
1977                            ),
1978                        )
1979                        .with_context("asset_uuid", asset_id.to_string()),
1980                    );
1981                }
1982            }
1983        }
1984    }
1985
1986    /// ST 377-1 / ST 2067-2: Cross-validate MXF file headers against package metadata.
1987    ///
1988    /// For each MXF track file in the package:
1989    /// 1. Parse the MXF Header Partition Pack
1990    /// 2. Check that the Operational Pattern is OP1a (required for IMF per ST 2067-2)
1991    /// 3. Report parse failures as warnings (file may be unavailable or corrupt)
1992    fn validate_mxf_headers(&self, report: &mut ValidationReport) {
1993        // OP1a UL prefix: 060e2b34.04010102.0d010201.0101__00
1994        // Bytes 13-14 identify the OP variant: 01 01 = OP1a, 01 02 = OP1b, etc.
1995        // Byte 15 encodes the qualifier (xxxx xxxx pattern). We ignore byte 8 (version).
1996        const OP1A_BYTES_13_14: [u8; 2] = [0x01, 0x01];
1997
1998        // Collect MXF asset UUIDs from PKLs
1999        for pkl in self.packing_lists.values() {
2000            for asset in &pkl.asset_list.assets {
2001                if !asset.mime_type.is_mxf() {
2002                    continue;
2003                }
2004                let path = match self.asset_paths.get(&asset.id) {
2005                    Some(p) => p,
2006                    None => continue, // Missing file already reported by validate_file_manifest
2007                };
2008                if !path.exists() {
2009                    continue; // Missing file already reported by validate_file_manifest
2010                }
2011
2012                // MXF essence checks via the `regxmllib-rs` family
2013                // (smpte-mxf). Emits ST 2067-2 §5.2 / ST 377-1 §6.4
2014                // and §8.3.3 diagnostics that the hand-rolled parser
2015                // below doesn't cover. Native-only — the wasm build
2016                // doesn't link smpte-mxf, and browser callers don't
2017                // see MXF binaries anyway (they upload XML only).
2018                #[cfg(not(target_arch = "wasm32"))]
2019                for issue in crate::mxf::essence::validate_mxf_essence(path) {
2020                    let issue = issue.with_context("asset_uuid", asset.id.to_string());
2021                    report.add(issue);
2022                }
2023
2024                // ST 2067-2 §5.3 audio MCA + §5.4 timed-text checks.
2025                // Run the regxml pipeline on the MXF to get typed
2026                // header metadata, then apply the descriptor rules.
2027                // Failure to convert is surfaced as a Warning rather
2028                // than Error — partition-pack diagnostics above
2029                // already cover the structural concern, and we don't
2030                // want to double-report.
2031                #[cfg(not(target_arch = "wasm32"))]
2032                {
2033                    let opts = regxml::MxfFragmentOptions {
2034                        partition: regxml::PartitionTarget::Header,
2035                        ..Default::default()
2036                    };
2037                    match crate::mxf::metadata::parse_mxf_to_regxml(path, opts) {
2038                        Ok(regxml) => {
2039                            for issue in crate::mxf::audio_mca::check_audio_mca(&regxml, path) {
2040                                let issue = issue.with_context("asset_uuid", asset.id.to_string());
2041                                report.add(issue);
2042                            }
2043                            for issue in crate::mxf::timed_text::check_timed_text(&regxml, path) {
2044                                let issue = issue.with_context("asset_uuid", asset.id.to_string());
2045                                report.add(issue);
2046                            }
2047                        }
2048                        Err(e) => {
2049                            report.add(
2050                                crate::mxf::metadata::regxml_error_issue(path, &e)
2051                                    .with_context("asset_uuid", asset.id.to_string()),
2052                            );
2053                        }
2054                    }
2055                }
2056
2057                match crate::mxf::parse_mxf_header_info(path) {
2058                    Ok(info) => {
2059                        // Parse the operational pattern UL back to bytes to check OP variant.
2060                        // The UL format is: urn:smpte:ul:XXXXXXXX.XXXXXXXX.XXXXXXXX.XXXXXXXX
2061                        // We need bytes 13-14 (1-indexed) to identify the OP.
2062                        let op_bytes = parse_ul_bytes(&info.operational_pattern);
2063                        if let Some(bytes) = op_bytes {
2064                            // IMF requires OP1a: bytes 13-14 (0-indexed: 12-13) = 01 01
2065                            if bytes[12] != OP1A_BYTES_13_14[0] || bytes[13] != OP1A_BYTES_13_14[1]
2066                            {
2067                                report.add(
2068                                    ValidationIssue::new(
2069                                        Severity::Error,
2070                                        Category::Encoding,
2071                                        codes::St377_1_2011::Op1a,
2072                                        format!(
2073                                            "MXF track file '{}' has Operational Pattern '{}' \
2074                                             but IMF requires OP1a (ST 2067-2 §5.1)",
2075                                            path.file_name()
2076                                                .map(|n| n.to_string_lossy())
2077                                                .unwrap_or_default(),
2078                                            info.operational_pattern,
2079                                        ),
2080                                    )
2081                                    .with_location(Location::new().with_file(path.clone()))
2082                                    .with_context("asset_uuid", asset.id.to_string()),
2083                                );
2084                            }
2085                        }
2086
2087                        // ST 377-1: MXF track files should have at least one essence container
2088                        if info.essence_containers.is_empty() {
2089                            report.add(
2090                                ValidationIssue::new(
2091                                    Severity::Warning,
2092                                    Category::Encoding,
2093                                    codes::St377_1_2011::NoEssenceContainers,
2094                                    format!(
2095                                        "MXF track file '{}' has no essence containers in its header partition",
2096                                        path.file_name().map(|n| n.to_string_lossy()).unwrap_or_default(),
2097                                    ),
2098                                )
2099                                .with_location(Location::new().with_file(path.clone()))
2100                                .with_context("asset_uuid", asset.id.to_string()),
2101                            );
2102                        }
2103                    }
2104                    Err(crate::mxf::MxfParseError::NotMxf) => {
2105                        report.add(
2106                            ValidationIssue::new(
2107                                Severity::Warning,
2108                                Category::Asset,
2109                                codes::St377_1_2011::NotMxf,
2110                                format!(
2111                                    "File '{}' has MXF MIME type but is not a valid MXF file",
2112                                    path.file_name()
2113                                        .map(|n| n.to_string_lossy())
2114                                        .unwrap_or_default(),
2115                                ),
2116                            )
2117                            .with_location(Location::new().with_file(path.clone()))
2118                            .with_context("asset_uuid", asset.id.to_string()),
2119                        );
2120                    }
2121                    Err(e) => {
2122                        report.add(
2123                            ValidationIssue::new(
2124                                Severity::Warning,
2125                                Category::Asset,
2126                                codes::St377_1_2011::ParseError,
2127                                format!(
2128                                    "Could not parse MXF header of '{}': {}",
2129                                    path.file_name()
2130                                        .map(|n| n.to_string_lossy())
2131                                        .unwrap_or_default(),
2132                                    e,
2133                                ),
2134                            )
2135                            .with_location(Location::new().with_file(path.clone()))
2136                            .with_context("asset_uuid", asset.id.to_string()),
2137                        );
2138                    }
2139                }
2140            }
2141        }
2142    }
2143
2144    /// ST 2067-3 §7.2.2: Within each segment, all virtual tracks must span the
2145    /// same timeline duration. Durations are compared in time (seconds), not in
2146    /// raw edit-rate units, because video (e.g. 24fps) and audio (e.g. 48000Hz)
2147    /// use different edit rates.
2148    ///
2149    /// A resource's effective duration in edit-rate units =
2150    /// `source_duration.unwrap_or(intrinsic_duration - entry_point.unwrap_or(0))`.
2151    /// Time = effective_duration / edit_rate.
2152    #[allow(dead_code)]
2153    fn validate_segment_durations(&self, report: &mut ValidationReport) {
2154        for cpl in self.composition_playlists.values() {
2155            let cpl_id = cpl.id;
2156            let cpl_er = cpl.edit_rate.as_ref();
2157
2158            for (seg_idx, segment) in cpl.segment_list.segments.iter().enumerate() {
2159                let mut durations: Vec<(String, f64)> = Vec::new();
2160
2161                for seq in segment.sequence_list.all_sequences() {
2162                    let resources = &seq.resource_list().resources;
2163                    let mut total_num: u64 = 0;
2164                    let mut rate_den: u64 = 1;
2165                    for r in resources {
2166                        let ep = r.entry_point.unwrap_or(0);
2167                        let dur = r
2168                            .source_duration
2169                            .unwrap_or(r.intrinsic_duration.saturating_sub(ep));
2170                        let er = r
2171                            .edit_rate
2172                            .as_ref()
2173                            .or(cpl_er)
2174                            .cloned()
2175                            .unwrap_or(EditRate::new(1, 1));
2176                        total_num =
2177                            total_num.saturating_add(dur.saturating_mul(er.denominator as u64));
2178                        rate_den = er.numerator as u64;
2179                    }
2180                    if rate_den > 0 {
2181                        durations.push((
2182                            seq.track_id().to_string(),
2183                            total_num as f64 / rate_den as f64,
2184                        ));
2185                    }
2186                }
2187
2188                if durations.is_empty() {
2189                    continue;
2190                }
2191
2192                let first_dur = durations[0].1;
2193                // Allow 1μs tolerance for floating-point rounding
2194                const TOLERANCE: f64 = 0.000001;
2195                for (track_id, dur) in &durations[1..] {
2196                    if (*dur - first_dur).abs() > TOLERANCE {
2197                        report.add(
2198                            ValidationIssue::new(
2199                                Severity::Error,
2200                                Category::Timing,
2201                                codes::St2067_3_2016::SegmentDuration,
2202                                format!(
2203                                    "Segment {} has mismatched virtual track durations: \
2204                                     track {} = {:.6}s but track {} = {:.6}s",
2205                                    seg_idx, durations[0].0, first_dur, track_id, dur,
2206                                ),
2207                            )
2208                            .with_location(Location::new().with_cpl(cpl_id).with_segment(seg_idx)),
2209                        );
2210                        break; // One error per segment is sufficient
2211                    }
2212                }
2213            }
2214        }
2215    }
2216
2217    /// Accumulating version of CPL asset reference validation.
2218    ///
2219    /// Per SMPTE ST 2067-2 §7, every TrackFileId in a CPL Resource must correspond
2220    /// to an asset UUID in the AssetMap. Reports each missing reference as a separate
2221    /// `ValidationIssue` rather than failing on the first one.
2222    fn validate_cpl_asset_references_accumulating(
2223        &self,
2224        cpl: &crate::cpl::CompositionPlaylist,
2225        report: &mut ValidationReport,
2226    ) {
2227        if self.asset_map.asset_list.assets.is_empty() {
2228            report.add(
2229                ValidationIssue::new(
2230                    Severity::Critical,
2231                    Category::Structure,
2232                    codes::St2067_2_2020::AssetMap,
2233                    "AssetMap contains no assets",
2234                )
2235                .with_location(Location::new().with_cpl(cpl.id)),
2236            );
2237            return;
2238        }
2239
2240        let assetmap_ids: std::collections::HashSet<ImfUuid> = self
2241            .asset_map
2242            .asset_list
2243            .assets
2244            .iter()
2245            .map(|a| a.id)
2246            .collect();
2247
2248        let cpl_id = cpl.id;
2249
2250        for (seg_idx, segment) in cpl.segment_list.segments.iter().enumerate() {
2251            for (seq, track_type) in segment.sequence_list.all_sequences_typed() {
2252                for (res_idx, resource) in seq.resource_list().resources.iter().enumerate() {
2253                    if let Some(ref track_file_id) = resource.track_file_id {
2254                        if !assetmap_ids.contains(track_file_id) {
2255                            report.add(
2256                                ValidationIssue::new(
2257                                    Severity::Error,
2258                                    Category::Reference,
2259                                    codes::St2067_2_2020::UnresolvedUuid,
2260                                    format!(
2261                                        "{} TrackFileId {} not found in AssetMap",
2262                                        track_type, track_file_id
2263                                    ),
2264                                )
2265                                .with_location(
2266                                    Location::new()
2267                                        .with_cpl(cpl_id)
2268                                        .with_segment(seg_idx)
2269                                        .with_resource(res_idx),
2270                                )
2271                                .with_context("track_file_id", track_file_id.to_string()),
2272                            );
2273                        }
2274                    }
2275                }
2276            }
2277        }
2278    }
2279}
2280
2281/// Parse a `urn:smpte:ul:XXXXXXXX.XXXXXXXX.XXXXXXXX.XXXXXXXX` string into 16 raw bytes.
2282fn parse_ul_bytes(ul: &str) -> Option<[u8; 16]> {
2283    let hex = ul.strip_prefix("urn:smpte:ul:")?;
2284    let hex_clean: String = hex.chars().filter(|c| c.is_ascii_hexdigit()).collect();
2285    if hex_clean.len() != 32 {
2286        return None;
2287    }
2288    let mut bytes = [0u8; 16];
2289    for i in 0..16 {
2290        bytes[i] = u8::from_str_radix(&hex_clean[i * 2..i * 2 + 2], 16).ok()?;
2291    }
2292    Some(bytes)
2293}
2294
2295#[cfg_attr(feature = "jsonschema", derive(schemars::JsonSchema))]
2296#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2297pub struct CplDetails {
2298    pub id: String,
2299    pub title: String,
2300    pub kind: String,
2301    pub issue_date: String,
2302    pub annotation: Option<String>,
2303    pub issuer: Option<String>,
2304    pub creator: Option<String>,
2305    pub content_originator: Option<String>,
2306    pub content_versions: Vec<String>,
2307    pub segments: Vec<SegmentInfo>,
2308}
2309
2310#[cfg_attr(feature = "jsonschema", derive(schemars::JsonSchema))]
2311#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2312pub struct SegmentInfo {
2313    pub id: String,
2314    pub sequence_count: usize,
2315}
2316
2317#[cfg_attr(feature = "jsonschema", derive(schemars::JsonSchema))]
2318#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2319pub struct TrackAnalysis {
2320    pub cpl_id: String,
2321    pub cpl_title: String,
2322    pub total_tracks: usize,
2323    pub audio_tracks: usize,
2324    pub video_tracks: usize,
2325    pub subtitle_tracks: usize,
2326    pub languages: Vec<String>,
2327    pub codecs: Vec<String>,
2328}
2329
2330/// Project a parsed CPL into the flat `CplDetails` view used by the CLI and other consumers.
2331pub fn cpl_details_from(cpl: &crate::cpl::CompositionPlaylist) -> CplDetails {
2332    let content_versions = if let Some(ref version_list) = cpl.content_version_list {
2333        version_list
2334            .content_versions
2335            .iter()
2336            .map(|v| v.id.clone())
2337            .collect()
2338    } else {
2339        Vec::new()
2340    };
2341
2342    let segments = cpl
2343        .segment_list
2344        .segments
2345        .iter()
2346        .map(|seg| {
2347            let seq_list = &seg.sequence_list;
2348            let sequence_count = seq_list.main_image_sequences.len()
2349                + seq_list.main_audio_sequences.len()
2350                + seq_list.subtitles_sequences.len();
2351            SegmentInfo {
2352                id: seg.id.to_string(),
2353                sequence_count,
2354            }
2355        })
2356        .collect();
2357
2358    CplDetails {
2359        id: cpl.id.to_string(),
2360        title: cpl.content_title.text.clone(),
2361        kind: cpl.content_kind.to_string(),
2362        issue_date: cpl.issue_date.clone(),
2363        annotation: cpl.annotation.as_ref().map(|ls| ls.text.clone()),
2364        issuer: cpl.issuer.as_ref().map(|ls| ls.text.clone()),
2365        creator: cpl.creator.as_ref().map(|ls| ls.text.clone()),
2366        content_originator: cpl.content_originator.as_ref().map(|ls| ls.text.clone()),
2367        content_versions,
2368        segments,
2369    }
2370}
2371
2372impl Imferno {
2373    /// Get detailed information about a specific CPL
2374    pub fn get_cpl_details(&self, uuid: &str) -> Option<CplDetails> {
2375        self.get_cpl_str(uuid).map(cpl_details_from)
2376    }
2377
2378    /// Get track analysis for all CPLs
2379    pub fn analyze_tracks(&self) -> Vec<TrackAnalysis> {
2380        let mut analyses = Vec::new();
2381
2382        for (uuid, cpl) in &self.composition_playlists {
2383            let mut total_tracks = 0;
2384            let mut audio_tracks = 0;
2385            let mut video_tracks = 0;
2386            let mut subtitle_tracks = 0;
2387            let mut codecs = std::collections::HashSet::new();
2388
2389            for segment in &cpl.segment_list.segments {
2390                let seq_list = &segment.sequence_list;
2391
2392                if !seq_list.main_image_sequences.is_empty() {
2393                    video_tracks += seq_list.main_image_sequences.len();
2394                    total_tracks += seq_list.main_image_sequences.len();
2395                    codecs.insert("Video".to_string());
2396                }
2397
2398                if !seq_list.main_audio_sequences.is_empty() {
2399                    audio_tracks += seq_list.main_audio_sequences.len();
2400                    total_tracks += seq_list.main_audio_sequences.len();
2401                    codecs.insert("Audio".to_string());
2402                }
2403
2404                if !seq_list.subtitles_sequences.is_empty() {
2405                    subtitle_tracks += seq_list.subtitles_sequences.len();
2406                    total_tracks += seq_list.subtitles_sequences.len();
2407                    codecs.insert("Subtitle".to_string());
2408                }
2409            }
2410
2411            analyses.push(TrackAnalysis {
2412                cpl_id: uuid.to_string(),
2413                cpl_title: cpl.content_title.text.clone(),
2414                total_tracks,
2415                audio_tracks,
2416                video_tracks,
2417                subtitle_tracks,
2418                languages: Vec::new(),
2419                codecs: codecs.into_iter().collect(),
2420            });
2421        }
2422
2423        analyses
2424    }
2425
2426    /// Get enhanced track analysis using provided feature data
2427    pub fn analyze_tracks_enhanced(
2428        &self,
2429        feature_data: Option<serde_json::Value>,
2430    ) -> Vec<TrackAnalysis> {
2431        let mut analyses = Vec::new();
2432
2433        for (uuid, cpl) in &self.composition_playlists {
2434            let mut total_tracks = 0;
2435            let mut audio_tracks = 0;
2436            let mut video_tracks = 0;
2437            let mut subtitle_tracks = 0;
2438            let mut codecs = std::collections::HashSet::new();
2439
2440            for segment in &cpl.segment_list.segments {
2441                let seq_list = &segment.sequence_list;
2442
2443                if !seq_list.main_image_sequences.is_empty() {
2444                    video_tracks += seq_list.main_image_sequences.len();
2445                    total_tracks += seq_list.main_image_sequences.len();
2446                }
2447
2448                if !seq_list.main_audio_sequences.is_empty() {
2449                    audio_tracks += seq_list.main_audio_sequences.len();
2450                    total_tracks += seq_list.main_audio_sequences.len();
2451                }
2452
2453                if !seq_list.subtitles_sequences.is_empty() {
2454                    subtitle_tracks += seq_list.subtitles_sequences.len();
2455                    total_tracks += seq_list.subtitles_sequences.len();
2456                }
2457            }
2458
2459            let languages = if let Some(ref data) = feature_data {
2460                if let Some(audio_langs) = data["audio_languages"].as_array() {
2461                    audio_langs
2462                        .iter()
2463                        .filter_map(|v| v.as_str().map(String::from))
2464                        .collect()
2465                } else {
2466                    Vec::new()
2467                }
2468            } else {
2469                Vec::new()
2470            };
2471
2472            if let Some(ref data) = feature_data {
2473                if let Some(video_codecs) = data["video_codecs"].as_array() {
2474                    for codec in video_codecs {
2475                        if let Some(codec_str) = codec.as_str() {
2476                            codecs.insert(codec_str.to_string());
2477                        }
2478                    }
2479                }
2480                if let Some(audio_codecs) = data["audio_codecs"].as_array() {
2481                    for codec in audio_codecs {
2482                        if let Some(codec_str) = codec.as_str() {
2483                            codecs.insert(codec_str.to_string());
2484                        }
2485                    }
2486                }
2487            }
2488
2489            if video_tracks > 0 {
2490                codecs.insert("Video".to_string());
2491            }
2492            if audio_tracks > 0 {
2493                codecs.insert("Audio".to_string());
2494            }
2495            if subtitle_tracks > 0 {
2496                codecs.insert("Subtitle".to_string());
2497            }
2498
2499            analyses.push(TrackAnalysis {
2500                cpl_id: uuid.to_string(),
2501                cpl_title: cpl.content_title.text.clone(),
2502                total_tracks,
2503                audio_tracks,
2504                video_tracks,
2505                subtitle_tracks,
2506                languages,
2507                codecs: codecs.into_iter().collect(),
2508            });
2509        }
2510
2511        analyses
2512    }
2513}
2514
2515// ── Pipeline options ──────────────────────────────────────────────────────────
2516
2517pub use crate::diagnostics::{RuleSeverity, RulesConfig};
2518
2519/// Options controlling validation behaviour.
2520#[derive(Debug, Default, Clone)]
2521pub struct ValidationOptions {
2522    /// ESLint-style per-rule severity overrides applied to the output.
2523    /// An empty map (the default) is a no-op.
2524    pub rules: RulesConfig,
2525    /// Core constraints spec version. `None` = auto-detect from CPL namespace.
2526    pub core_spec: Option<crate::validation::CoreSpecTarget>,
2527    /// Application profile spec versions. `None` = auto-detect from CPL.
2528    pub app_specs: Option<Vec<crate::validation::AppSpecTarget>>,
2529    /// When `true`, collapse repeat-offender issues (same code in the
2530    /// same severity bucket) into one entry carrying the rest of their
2531    /// `Location`s in `additional_instances`. Operator-facing wins:
2532    /// reports stay readable on packages with thousands of similar
2533    /// findings. Default `false` — preserves the legacy
2534    /// one-issue-per-occurrence shape.
2535    pub aggregate_repeats: bool,
2536    /// Path used for hash verification (only meaningful on native targets).
2537    /// When `Some`, hash verification is enabled; when `None` (the default), skipped.
2538    #[cfg(not(target_arch = "wasm32"))]
2539    pub verify_hashes: Option<PathBuf>,
2540    /// Skip all disk I/O checks: file manifest (existence/size) and MXF header inspection.
2541    /// Useful for packages on slow or remote filesystems (e.g. S3 via MacFUSE) where
2542    /// XML-only structural validation is sufficient.
2543    #[cfg(not(target_arch = "wasm32"))]
2544    pub skip_disk_checks: bool,
2545}
2546
2547/// Per-file hash verification status.
2548#[cfg(feature = "tokio")]
2549#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2550pub enum HashFileStatus {
2551    Waiting,
2552    Hashing,
2553    Done,
2554    Failed,
2555}
2556
2557/// Per-file progress info for the hash verification display.
2558#[cfg(feature = "tokio")]
2559pub struct HashFileInfo {
2560    pub name: String,
2561    pub size: u64,
2562    pub bytes_done: std::sync::Arc<std::sync::atomic::AtomicU64>,
2563    pub status: std::sync::Arc<std::sync::atomic::AtomicU8>,
2564}
2565
2566/// Thread-safe progress tracker for parallel hash verification.
2567#[cfg(feature = "tokio")]
2568pub struct HashProgressTracker {
2569    pub files: std::sync::Mutex<Vec<HashFileInfo>>,
2570}
2571
2572#[cfg(feature = "tokio")]
2573impl HashProgressTracker {
2574    pub fn new() -> Self {
2575        Self {
2576            files: std::sync::Mutex::new(Vec::new()),
2577        }
2578    }
2579
2580    pub fn register(
2581        &self,
2582        name: String,
2583        size: u64,
2584    ) -> (
2585        std::sync::Arc<std::sync::atomic::AtomicU64>,
2586        std::sync::Arc<std::sync::atomic::AtomicU8>,
2587    ) {
2588        let bytes_done = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0));
2589        let status = std::sync::Arc::new(std::sync::atomic::AtomicU8::new(0));
2590        let bd = bytes_done.clone();
2591        let st = status.clone();
2592        self.files.lock().unwrap().push(HashFileInfo {
2593            name,
2594            size,
2595            bytes_done,
2596            status,
2597        });
2598        (bd, st)
2599    }
2600
2601    /// Snapshot of all file progress for display. Lock-free reads on atomics.
2602    pub fn snapshot(&self) -> Vec<(String, u64, u64, HashFileStatus)> {
2603        use std::sync::atomic::Ordering::Relaxed;
2604        let files = self.files.lock().unwrap();
2605        files
2606            .iter()
2607            .map(|f| {
2608                let status = match f.status.load(Relaxed) {
2609                    1 => HashFileStatus::Hashing,
2610                    2 => HashFileStatus::Done,
2611                    3 => HashFileStatus::Failed,
2612                    _ => HashFileStatus::Waiting,
2613                };
2614                (f.name.clone(), f.bytes_done.load(Relaxed), f.size, status)
2615            })
2616            .collect()
2617    }
2618
2619    /// Total bytes done across all files.
2620    pub fn total_bytes_done(&self) -> u64 {
2621        use std::sync::atomic::Ordering::Relaxed;
2622        let files = self.files.lock().unwrap();
2623        files.iter().map(|f| f.bytes_done.load(Relaxed)).sum()
2624    }
2625
2626    /// Total bytes across all files.
2627    pub fn total_bytes(&self) -> u64 {
2628        let files = self.files.lock().unwrap();
2629        files.iter().map(|f| f.size).sum()
2630    }
2631}
2632
2633#[cfg(feature = "tokio")]
2634impl Default for HashProgressTracker {
2635    fn default() -> Self {
2636        Self::new()
2637    }
2638}
2639
2640/// Hash a single file and compare against expected digest. Returns error on mismatch.
2641#[cfg(all(not(target_arch = "wasm32"), feature = "tokio"))]
2642fn hash_single_file(
2643    uuid: &str,
2644    path: &std::path::Path,
2645    expected_b64: &str,
2646    algorithm: crate::assetmap::HashAlgorithm,
2647    bytes_done: &std::sync::atomic::AtomicU64,
2648) -> Option<FileValidationError> {
2649    use std::io::Read;
2650    use std::sync::atomic::Ordering;
2651
2652    let file = match std::fs::File::open(path) {
2653        Ok(f) => f,
2654        Err(e) => {
2655            return Some(FileValidationError::Io {
2656                uuid: uuid.to_string(),
2657                path: path.to_path_buf(),
2658                message: e.to_string(),
2659            });
2660        }
2661    };
2662
2663    let mut reader = std::io::BufReader::with_capacity(1024 * 1024, file);
2664    let mut buf = vec![0u8; 1024 * 1024];
2665
2666    let actual_b64 = match algorithm {
2667        crate::assetmap::HashAlgorithm::Sha1 => {
2668            use sha1::Digest;
2669            let mut hasher = sha1::Sha1::new();
2670            loop {
2671                match reader.read(&mut buf) {
2672                    Ok(0) => break,
2673                    Ok(n) => {
2674                        hasher.update(&buf[..n]);
2675                        bytes_done.fetch_add(n as u64, Ordering::Relaxed);
2676                    }
2677                    Err(e) => {
2678                        return Some(FileValidationError::Io {
2679                            uuid: uuid.to_string(),
2680                            path: path.to_path_buf(),
2681                            message: e.to_string(),
2682                        });
2683                    }
2684                }
2685            }
2686            base64::Engine::encode(
2687                &base64::engine::general_purpose::STANDARD,
2688                hasher.finalize(),
2689            )
2690        }
2691        crate::assetmap::HashAlgorithm::Sha256 => {
2692            use sha2::Digest;
2693            let mut hasher = sha2::Sha256::new();
2694            loop {
2695                match reader.read(&mut buf) {
2696                    Ok(0) => break,
2697                    Ok(n) => {
2698                        hasher.update(&buf[..n]);
2699                        bytes_done.fetch_add(n as u64, Ordering::Relaxed);
2700                    }
2701                    Err(e) => {
2702                        return Some(FileValidationError::Io {
2703                            uuid: uuid.to_string(),
2704                            path: path.to_path_buf(),
2705                            message: e.to_string(),
2706                        });
2707                    }
2708                }
2709            }
2710            base64::Engine::encode(
2711                &base64::engine::general_purpose::STANDARD,
2712                hasher.finalize(),
2713            )
2714        }
2715    };
2716
2717    if actual_b64 != expected_b64 {
2718        Some(FileValidationError::HashMismatch {
2719            uuid: uuid.to_string(),
2720            path: path.to_path_buf(),
2721            expected: expected_b64.to_string(),
2722            actual: actual_b64,
2723        })
2724    } else {
2725        None
2726    }
2727}
2728
2729#[cfg(test)]
2730mod tests {
2731    use super::*;
2732    use codes::{St2067_2_2020, St377_1_2011, ValidationCode};
2733
2734    fn test_data(name: &str) -> PathBuf {
2735        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
2736            .join("../../test-data")
2737            .join(name)
2738    }
2739
2740    /// `ValidationOptions::default()` plus the MERIDIAN-specific rule
2741    /// suppressions needed so the reference fixture validates clean.
2742    ///
2743    /// The Photon-vintage MERIDIAN sample carries a real ST 377-4 §6.3.2
2744    /// audio defect (`SoundfieldGroupLinkID` on the
2745    /// `AudioChannelLabelSubDescriptor` doesn't match the
2746    /// `SoundfieldGroupLabelSubDescriptor`'s `MCALinkID`). Our photon-
2747    /// parity sweep now catches this — correctly — at Error severity, so
2748    /// tests asserting "MERIDIAN validates without errors" need to opt
2749    /// out of that single rule explicitly. The CLI tests do the same via
2750    /// `--rule SoundfieldGroupLinkIDMismatch=off`.
2751    fn meridian_test_options() -> ValidationOptions {
2752        let mut rules = crate::diagnostics::rules::RulesConfig::default();
2753        rules.set_raw(
2754            "SoundfieldGroupLinkIDMismatch".to_string(),
2755            crate::diagnostics::rules::RuleSeverity::Off,
2756        );
2757        ValidationOptions {
2758            rules,
2759            ..ValidationOptions::default()
2760        }
2761    }
2762
2763    #[test]
2764    fn test_parse_netflix_photon_package() {
2765        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2766
2767        match Imferno::parse(read_dir(test_path).unwrap()) {
2768            Ok(package) => {
2769                assert_eq!(package.volume_index.index, 1);
2770                assert!(!package.asset_map.asset_list.assets.is_empty());
2771                assert!(!package.composition_playlists.is_empty());
2772
2773                let main_cpl = package.get_main_cpl().unwrap();
2774                assert_eq!(main_cpl.content_kind, crate::cpl::ContentKind::Test);
2775                assert_eq!(main_cpl.content_title.text, "MERIDIAN");
2776
2777                // `validate_structure()` is the legacy Result-shaped entry
2778                // and can't accept rule overrides, so go through
2779                // `validate(&meridian_test_options())` to suppress the
2780                // known MERIDIAN ST 377-4 §6.3.2 audio defect (see helper).
2781                let report = package.validate(&meridian_test_options());
2782                assert!(
2783                    !report.has_errors(),
2784                    "MERIDIAN should validate cleanly under meridian_test_options: {:?}",
2785                    report.summary()
2786                );
2787            }
2788            Err(e) => panic!("Failed to parse IMF package: {:?}", e),
2789        }
2790    }
2791
2792    #[test]
2793    fn test_get_cpl_details_api() {
2794        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2795        let package =
2796            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2797
2798        let cpl_uuid = "0eb3d1b9-b77b-4d3f-bbe5-7c69b15dca85";
2799        let details = package
2800            .get_cpl_details(cpl_uuid)
2801            .expect("Failed to get CPL details");
2802
2803        assert_eq!(details.id, cpl_uuid);
2804        assert_eq!(details.title, "MERIDIAN");
2805        assert_eq!(details.kind, "Test");
2806        assert!(details.annotation.is_some());
2807        assert_eq!(details.segments.len(), 1);
2808
2809        let segment = &details.segments[0];
2810        assert!(!segment.id.is_empty());
2811
2812        // Test with non-existent UUID
2813        assert!(package.get_cpl_details("invalid-uuid").is_none());
2814    }
2815
2816    #[test]
2817    fn test_analyze_tracks_api() {
2818        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2819        let package =
2820            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2821
2822        let track_analyses = package.analyze_tracks();
2823
2824        assert_eq!(track_analyses.len(), 1);
2825        let analysis = &track_analyses[0];
2826
2827        assert_eq!(analysis.cpl_title, "MERIDIAN");
2828    }
2829
2830    #[test]
2831    fn test_list_cpl_uuids_api() {
2832        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2833        let package =
2834            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2835
2836        let uuids = package.list_cpl_uuids();
2837
2838        assert_eq!(uuids.len(), 1);
2839        assert_eq!(uuids[0].to_string(), "0eb3d1b9-b77b-4d3f-bbe5-7c69b15dca85");
2840    }
2841
2842    #[test]
2843    fn test_validation_api() {
2844        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2845        let package =
2846            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2847
2848        let report = package.validate(&meridian_test_options());
2849        assert!(
2850            !report.has_errors(),
2851            "Package structure validation should have no errors: {:?}",
2852            report.summary()
2853        );
2854    }
2855
2856    #[test]
2857    fn test_validate_package_structure_with_cpl_validator_injects_issues() {
2858        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2859        let package =
2860            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2861
2862        const INJECTED_CODE: &str = "ST2067-2:2020:6.12/InjectedRuleForTest";
2863
2864        let report = package.validate_package_structure_with_cpl_validator(
2865            |cpl| {
2866                vec![ValidationIssue::new(
2867                    Severity::Warning,
2868                    Category::Metadata,
2869                    INJECTED_CODE,
2870                    format!("Injected validator issue for CPL {}", cpl.id),
2871                )]
2872            },
2873            false,
2874        );
2875
2876        let expected_code = INJECTED_CODE;
2877        let injected_present = report
2878            .warnings
2879            .iter()
2880            .any(|issue| issue.code == expected_code)
2881            || report
2882                .errors
2883                .iter()
2884                .any(|issue| issue.code == expected_code)
2885            || report
2886                .critical
2887                .iter()
2888                .any(|issue| issue.code == expected_code)
2889            || report.info.iter().any(|issue| issue.code == expected_code);
2890        assert!(
2891            injected_present,
2892            "Expected injected CPL issue to be present in report"
2893        );
2894    }
2895
2896    #[test]
2897    fn test_validate_package_structure_with_empty_cpl_validator_matches_default_counts() {
2898        use crate::validation::{
2899            validate_cpl_with_registry, ConfigurableValidatorRegistry, ValidatorSelection,
2900        };
2901
2902        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2903        let package =
2904            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2905
2906        // default_report uses the same st2067_21 registry as validate() uses internally.
2907        let default_report = package.validate(&ValidationOptions::default());
2908
2909        // Build the same registry that validate() uses so counts are comparable.
2910        let registry = ConfigurableValidatorRegistry::new(ValidatorSelection::default());
2911        let injected_report = package.validate_package_structure_with_cpl_validator(
2912            |cpl| validate_cpl_with_registry(cpl, &registry),
2913            false,
2914        );
2915
2916        assert_eq!(
2917            default_report.total_issues(),
2918            injected_report.total_issues()
2919        );
2920        assert_eq!(default_report.errors.len(), injected_report.errors.len());
2921        assert_eq!(
2922            default_report.warnings.len(),
2923            injected_report.warnings.len()
2924        );
2925        assert_eq!(
2926            default_report.critical.len(),
2927            injected_report.critical.len()
2928        );
2929        assert_eq!(default_report.info.len(), injected_report.info.len());
2930    }
2931
2932    #[test]
2933    fn test_package_with_missing_files() {
2934        let test_path = test_data("MissingFilesAndAssetMapEntries");
2935
2936        match Imferno::parse(read_dir(test_path).unwrap()) {
2937            Ok(package) => {
2938                let validation_fails = package.validate_structure().is_err();
2939                let structure_report = package.validate(&ValidationOptions::default());
2940                assert!(validation_fails || structure_report.has_errors());
2941            }
2942            Err(_) => {
2943                // Expected
2944            }
2945        }
2946    }
2947
2948    #[test]
2949    fn test_package_with_id_mismatch() {
2950        let test_path = test_data("MERIDIAN_Netflix_Photon_161006_ID_MISMATCH");
2951
2952        if let Ok(package) = Imferno::parse(read_dir(test_path).unwrap()) {
2953            assert!(!package.composition_playlists.is_empty());
2954        }
2955    }
2956
2957    #[test]
2958    fn test_lenient_parsing() {
2959        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2960
2961        let package = Imferno::parse(read_dir(&test_path).unwrap_or_default())
2962            .expect("Failed to parse package");
2963
2964        assert_eq!(package.composition_playlists.len(), 1);
2965    }
2966
2967    #[test]
2968    fn test_error_handling_invalid_path() {
2969        let invalid_path = "/nonexistent/path/to/package";
2970
2971        let result = Imferno::parse(read_dir(invalid_path).unwrap_or_default());
2972        // With an empty file map, ASSETMAP.xml will be missing → parse error
2973        assert!(result.is_err());
2974    }
2975
2976    #[test]
2977    fn test_get_asset_path() {
2978        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2979        let package =
2980            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2981
2982        if let Some(first_asset) = package.asset_map.asset_list.assets.first() {
2983            let asset_path = package.get_asset_path(first_asset.id);
2984            assert!(asset_path.is_some());
2985        }
2986
2987        // Test with invalid asset ID
2988        assert!(package.get_asset_path_str("invalid-id").is_none());
2989    }
2990
2991    #[test]
2992    fn test_validation_errors() {
2993        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
2994        let package =
2995            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
2996
2997        let report = package.validate(&meridian_test_options());
2998        assert!(
2999            !report.has_errors(),
3000            "Validation should pass: {:?}",
3001            report.summary()
3002        );
3003    }
3004
3005    #[test]
3006    fn test_get_cpl_with_invalid_uuid() {
3007        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3008        let package =
3009            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3010
3011        assert!(package.get_cpl_str("invalid-uuid").is_none());
3012
3013        let uuid = "0eb3d1b9-b77b-4d3f-bbe5-7c69b15dca85";
3014        let result = package.get_cpl_str(uuid);
3015        assert!(result.is_some());
3016    }
3017
3018    #[test]
3019    fn test_empty_package_edge_cases() {
3020        let test_path = test_data("MissingFilesAndAssetMapEntries");
3021
3022        if let Ok(package) = Imferno::parse(read_dir(test_path).unwrap()) {
3023            assert!(package.composition_playlists.is_empty());
3024            assert!(package.get_main_cpl().is_none());
3025            assert!(package.analyze_tracks().is_empty());
3026        }
3027    }
3028
3029    #[test]
3030    fn test_bad_xml_package() {
3031        match Imferno::parse(read_dir(test_data("BadXML")).unwrap_or_default()) {
3032            Ok(_) => {}
3033            Err(err) => {
3034                assert!(
3035                    err.to_string().contains("parsing")
3036                        || err.to_string().contains("XML")
3037                        || err.to_string().contains("Invalid")
3038                        || err.to_string().contains("Missing")
3039                );
3040            }
3041        }
3042    }
3043
3044    #[test]
3045    fn test_wrong_mime_types_package() {
3046        let test_path = test_data("WrongXmlMimeTypes");
3047
3048        if let Ok(package) = Imferno::parse(read_dir(test_path).unwrap_or_default()) {
3049            assert!(!package.asset_map.asset_list.assets.is_empty());
3050        }
3051    }
3052
3053    #[test]
3054    fn test_cpl_edge_cases() {
3055        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3056        let package =
3057            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3058
3059        assert!(!package.composition_playlists.is_empty());
3060
3061        let first_cpl = package.composition_playlists.values().next().unwrap();
3062        let details = package.get_cpl_details(&first_cpl.id.to_string()).unwrap();
3063        assert_eq!(details.title, first_cpl.content_title.text);
3064
3065        for version in &details.content_versions {
3066            assert!(!version.is_empty());
3067        }
3068    }
3069
3070    #[test]
3071    fn test_directory_structure_validation() {
3072        let current_dir = std::env::current_dir().unwrap();
3073        let result = Imferno::parse(read_dir(&current_dir).unwrap_or_default());
3074        assert!(result.is_err());
3075
3076        let fake_dir = "/this/path/does/not/exist";
3077        let result = Imferno::parse(read_dir(fake_dir).unwrap_or_default());
3078        assert!(result.is_err());
3079
3080        let file_path = concat!(env!("CARGO_MANIFEST_DIR"), "/../../Cargo.toml");
3081        let result = Imferno::parse(read_dir(file_path).unwrap_or_default());
3082        assert!(result.is_err());
3083    }
3084
3085    #[test]
3086    fn test_serialization() {
3087        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3088        let package =
3089            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3090
3091        let tracks = package.analyze_tracks();
3092        let json = serde_json::to_string(&tracks).expect("Failed to serialize tracks");
3093        assert!(json.contains("total_tracks") || json == "[]");
3094    }
3095
3096    #[test]
3097    fn test_concurrent_access() {
3098        use std::sync::Arc;
3099        use std::thread;
3100
3101        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3102        let package = Arc::new(
3103            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package"),
3104        );
3105
3106        let mut handles = vec![];
3107
3108        for _ in 0..4 {
3109            let pkg = package.clone();
3110            let handle = thread::spawn(move || {
3111                assert!(!pkg.asset_map.asset_list.assets.is_empty());
3112                assert!(!pkg.composition_playlists.is_empty());
3113                let _ = pkg.analyze_tracks();
3114            });
3115            handles.push(handle);
3116        }
3117
3118        for handle in handles {
3119            handle.join().expect("Thread failed");
3120        }
3121    }
3122
3123    #[test]
3124    fn test_malformed_xml_handling() {
3125        use std::fs;
3126        use tempfile::TempDir;
3127
3128        let temp_dir = TempDir::new().expect("Failed to create temp dir");
3129        let temp_path = temp_dir.path();
3130
3131        let volindex_content = r#"<?xml version="1.0" encoding="UTF-8"?>
3132<VolumeIndex xmlns="http://www.smpte-ra.org/schemas/2067-2/2016/volindex">
3133  <Index>1</Index>
3134</VolumeIndex>"#;
3135        fs::write(temp_path.join("VOLINDEX.xml"), volindex_content)
3136            .expect("Failed to write VOLINDEX");
3137
3138        let malformed_assetmap = r#"<?xml version="1.0" encoding="UTF-8"?>
3139<AssetMap xmlns="http://www.smpte-ra.org/schemas/2067-2/2016/assetmap">
3140  <Id>urn:uuid:invalid-xml</Id>
3141  <!-- Missing closing tag -->
3142  <AssetList>
3143    <Asset>
3144      <Id>test-asset</Id>
3145"#;
3146        fs::write(temp_path.join("ASSETMAP.xml"), malformed_assetmap)
3147            .expect("Failed to write malformed ASSETMAP");
3148
3149        let result = Imferno::parse(read_dir(temp_path).unwrap());
3150        assert!(result.is_err(), "Should fail with malformed XML");
3151    }
3152
3153    #[test]
3154    fn test_validation_with_complex_structure() {
3155        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3156        let package =
3157            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3158
3159        let report = package.validate(&meridian_test_options());
3160        assert!(
3161            !report.has_errors(),
3162            "Package should be valid: {:?}",
3163            report.summary()
3164        );
3165    }
3166
3167    #[test]
3168    fn test_package_with_no_cpls() {
3169        use std::fs;
3170        use tempfile::TempDir;
3171
3172        let temp_dir = TempDir::new().expect("Failed to create temp dir");
3173        let temp_path = temp_dir.path();
3174
3175        let volindex_content = r#"<?xml version="1.0" encoding="UTF-8"?>
3176<VolumeIndex xmlns="http://www.smpte-ra.org/schemas/2067-2/2016/volindex">
3177  <Index>1</Index>
3178</VolumeIndex>"#;
3179        fs::write(temp_path.join("VOLINDEX.xml"), volindex_content)
3180            .expect("Failed to write VOLINDEX");
3181
3182        let no_cpl_assetmap = r#"<?xml version="1.0" encoding="UTF-8"?>
3183<AssetMap xmlns="http://www.smpte-ra.org/schemas/2067-2/2016/assetmap">
3184  <Id>urn:uuid:12345678-1234-1234-1234-123456789012</Id>
3185  <VolumeCount>1</VolumeCount>
3186  <IssueDate>2023-01-01T00:00:00</IssueDate>
3187  <AssetList>
3188    <Asset>
3189      <Id>urn:uuid:aabbccdd-1122-3344-5566-778899aabbcc</Id>
3190      <ChunkList>
3191        <Chunk>
3192          <Path>video.mxf</Path>
3193        </Chunk>
3194      </ChunkList>
3195    </Asset>
3196  </AssetList>
3197</AssetMap>"#;
3198        fs::write(temp_path.join("ASSETMAP.xml"), no_cpl_assetmap)
3199            .expect("Failed to write ASSETMAP");
3200
3201        let result = Imferno::parse(read_dir(temp_path).unwrap());
3202        assert!(
3203            result.is_ok(),
3204            "Package with no CPLs should parse successfully"
3205        );
3206
3207        let package = result.unwrap();
3208        assert!(package.composition_playlists.is_empty());
3209        assert!(package.get_main_cpl().is_none());
3210        assert!(package.analyze_tracks().is_empty());
3211    }
3212
3213    #[test]
3214    fn test_asset_path_resolution() {
3215        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3216        let package =
3217            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3218
3219        for asset in &package.asset_map.asset_list.assets {
3220            let resolved_path = package.get_asset_path(asset.id);
3221            assert!(
3222                resolved_path.is_some(),
3223                "Should resolve path for asset {}",
3224                asset.id
3225            );
3226
3227            let path = resolved_path.unwrap();
3228            assert!(path.is_absolute(), "Resolved path should be absolute");
3229            assert!(
3230                path.starts_with(&package.root_path),
3231                "Path should be within package directory"
3232            );
3233        }
3234
3235        assert!(package.get_asset_path_str("invalid-id").is_none());
3236    }
3237
3238    #[test]
3239    fn test_boundary_conditions() {
3240        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3241        let package =
3242            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3243
3244        assert!(package.get_cpl_details("").is_none());
3245        assert!(package.get_cpl_details("   ").is_none());
3246        assert!(package.get_cpl_details("not-a-uuid").is_none());
3247
3248        assert!(package.get_asset_path_str("").is_none());
3249        assert!(package.get_asset_path_str("   ").is_none());
3250        assert!(package.get_asset_path_str("invalid-asset-id").is_none());
3251    }
3252
3253    #[test]
3254    fn test_large_package_handling() {
3255        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3256        let package =
3257            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3258
3259        let cpl_count = package.composition_playlists.len();
3260        for _ in 0..10 {
3261            assert!(!package.asset_map.asset_list.assets.is_empty());
3262            assert_eq!(package.analyze_tracks().len(), cpl_count);
3263        }
3264    }
3265
3266    #[test]
3267    fn test_validate_file_manifest_detects_mxf_files() {
3268        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3269        let package =
3270            Imferno::parse(read_dir(test_path).unwrap()).expect("Failed to parse package");
3271
3272        let errors = package.validate_file_manifest();
3273
3274        for err in &errors {
3275            assert!(
3276                !matches!(err, FileValidationError::Missing { .. }),
3277                "Unexpected missing file: {}",
3278                err
3279            );
3280        }
3281    }
3282
3283    #[test]
3284    fn test_validate_file_manifest_detects_missing_files() {
3285        use tempfile::TempDir;
3286
3287        let dir = TempDir::new().unwrap();
3288        let root = dir.path();
3289
3290        std::fs::write(root.join("VOLINDEX.xml"), r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#).unwrap();
3291
3292        let pkl_xml = r#"<?xml version="1.0"?><PackingList xmlns="http://www.smpte-ra.org/schemas/429-8/2007/PKL">
3293<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3294<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3295<AssetList>
3296  <Asset>
3297    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3298    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3299    <Size>999</Size>
3300    <Type>application/mxf</Type>
3301    <OriginalFileName>missing_file.mxf</OriginalFileName>
3302  </Asset>
3303</AssetList>
3304</PackingList>"#;
3305        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3306
3307        let assetmap_xml = r#"<?xml version="1.0"?><AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3308<Id>urn:uuid:cccccccc-0000-0000-0000-000000000003</Id>
3309<Creator>test</Creator>
3310<VolumeCount>1</VolumeCount>
3311<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3312<Issuer>test</Issuer>
3313<AssetList>
3314  <Asset>
3315    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3316    <PackingList>true</PackingList>
3317    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3318  </Asset>
3319  <Asset>
3320    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3321    <ChunkList><Chunk><Path>missing_file.mxf</Path></Chunk></ChunkList>
3322  </Asset>
3323</AssetList>
3324</AssetMap>"#;
3325        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3326
3327        let package = Imferno::parse(read_dir(root).unwrap()).expect("Failed to parse package");
3328        let errors = package.validate_file_manifest();
3329
3330        assert!(
3331            errors
3332                .iter()
3333                .any(|e| matches!(e, FileValidationError::Missing { .. })),
3334            "Expected a Missing error, got: {:?}",
3335            errors.iter().map(|e| e.to_string()).collect::<Vec<_>>()
3336        );
3337    }
3338
3339    // ── ST 2067-2 cross-reference validation ────────────────────────────────
3340
3341    /// SMPTE ST 2067-2 §7/9: PKL asset UUIDs must exist in the AssetMap.
3342    #[test]
3343    fn test_pkl_constraints_detects_missing_assetmap_entries() {
3344        use tempfile::TempDir;
3345
3346        let dir = TempDir::new().unwrap();
3347        let root = dir.path();
3348
3349        std::fs::write(root.join("VOLINDEX.xml"),
3350            r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#).unwrap();
3351
3352        // PKL references an asset that is NOT in the AssetMap
3353        let pkl_xml = r#"<?xml version="1.0"?>
3354<PackingList xmlns="http://www.smpte-ra.org/schemas/429-8/2007/PKL">
3355<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3356<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3357<AssetList>
3358  <Asset>
3359    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3360    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3361    <Size>999</Size>
3362    <Type>application/mxf</Type>
3363    <OriginalFileName>some.mxf</OriginalFileName>
3364  </Asset>
3365  <Asset>
3366    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000099</Id>
3367    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3368    <Size>100</Size>
3369    <Type>application/mxf</Type>
3370    <OriginalFileName>orphan.mxf</OriginalFileName>
3371  </Asset>
3372</AssetList>
3373</PackingList>"#;
3374        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3375
3376        // AssetMap only knows about the PKL and one asset (bbbbbbbb), not cccccccc
3377        let assetmap_xml = r#"<?xml version="1.0"?>
3378<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3379<Id>urn:uuid:dddddddd-0000-0000-0000-000000000004</Id>
3380<Creator>test</Creator>
3381<VolumeCount>1</VolumeCount>
3382<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3383<Issuer>test</Issuer>
3384<AssetList>
3385  <Asset>
3386    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3387    <PackingList>true</PackingList>
3388    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3389  </Asset>
3390  <Asset>
3391    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3392    <ChunkList><Chunk><Path>some.mxf</Path></Chunk></ChunkList>
3393  </Asset>
3394</AssetList>
3395</AssetMap>"#;
3396        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3397
3398        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3399        let errors = package.validate_pkl_constraints();
3400
3401        assert!(
3402            errors.iter().any(|e| matches!(e, FileValidationError::NotInAssetMap { uuid, .. } if uuid.contains("cccccccc"))),
3403            "Expected NotInAssetMap for cccccccc, got: {:?}",
3404            errors.iter().map(|e| e.to_string()).collect::<Vec<_>>()
3405        );
3406    }
3407
3408    /// SMPTE ST 2067-2 §7: CPL TrackFileId references must resolve in AssetMap.
3409    #[test]
3410    fn test_cpl_asset_reference_validation_on_meridian() {
3411        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3412        let package = Imferno::parse(read_dir(test_path).unwrap()).expect("parse");
3413
3414        // MERIDIAN package should have valid cross-references
3415        let report = package.validate(&meridian_test_options());
3416        assert!(
3417            !report.has_errors(),
3418            "MERIDIAN should be valid: {:?}",
3419            report.summary()
3420        );
3421    }
3422
3423    /// ST 2067-2 §9: PKL with an unrecognised namespace URI is flagged.
3424    /// Matches the published SMPTE PKL namespace whitelist.
3425    #[test]
3426    fn test_pkl_constraints_flags_unknown_namespace() {
3427        use tempfile::TempDir;
3428        let dir = TempDir::new().unwrap();
3429        let root = dir.path();
3430        std::fs::write(
3431            root.join("VOLINDEX.xml"),
3432            r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#,
3433        )
3434        .unwrap();
3435        let pkl_xml = r#"<?xml version="1.0"?>
3436<PackingList xmlns="urn:not-a-real-pkl-namespace">
3437<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3438<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3439<AssetList>
3440  <Asset>
3441    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3442    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3443    <Size>999</Size>
3444    <Type>application/mxf</Type>
3445  </Asset>
3446</AssetList>
3447</PackingList>"#;
3448        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3449        let assetmap_xml = r#"<?xml version="1.0"?>
3450<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3451<Id>urn:uuid:dddddddd-0000-0000-0000-000000000004</Id>
3452<Creator>test</Creator><VolumeCount>1</VolumeCount>
3453<IssueDate>2024-01-01T00:00:00Z</IssueDate><Issuer>test</Issuer>
3454<AssetList>
3455  <Asset>
3456    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3457    <PackingList>true</PackingList>
3458    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3459  </Asset>
3460  <Asset>
3461    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3462    <ChunkList><Chunk><Path>some.mxf</Path></Chunk></ChunkList>
3463  </Asset>
3464</AssetList></AssetMap>"#;
3465        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3466        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3467        let errors = package.validate_pkl_constraints();
3468        assert!(
3469            errors
3470                .iter()
3471                .any(|e| matches!(e, FileValidationError::UnknownPklNamespace { .. })),
3472            "Expected UnknownPklNamespace, got: {:?}",
3473            errors.iter().map(|e| e.to_string()).collect::<Vec<_>>()
3474        );
3475    }
3476
3477    /// ST 429-9 §6.3: AssetMap with no `<PackingList>true</PackingList>`
3478    /// asset must be flagged when a PKL document exists in the package.
3479    #[test]
3480    fn test_pkl_constraints_flags_assetmap_with_no_packinglist() {
3481        use tempfile::TempDir;
3482        let dir = TempDir::new().unwrap();
3483        let root = dir.path();
3484        std::fs::write(
3485            root.join("VOLINDEX.xml"),
3486            r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#,
3487        )
3488        .unwrap();
3489        let pkl_xml = r#"<?xml version="1.0"?>
3490<PackingList xmlns="http://www.smpte-ra.org/schemas/429-8/2007/PKL">
3491<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3492<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3493<AssetList>
3494  <Asset>
3495    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3496    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3497    <Size>999</Size>
3498    <Type>application/mxf</Type>
3499  </Asset>
3500</AssetList>
3501</PackingList>"#;
3502        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3503        // AssetMap omits the PackingList flag on every asset.
3504        let assetmap_xml = r#"<?xml version="1.0"?>
3505<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3506<Id>urn:uuid:dddddddd-0000-0000-0000-000000000004</Id>
3507<Creator>test</Creator><VolumeCount>1</VolumeCount>
3508<IssueDate>2024-01-01T00:00:00Z</IssueDate><Issuer>test</Issuer>
3509<AssetList>
3510  <Asset>
3511    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3512    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3513  </Asset>
3514  <Asset>
3515    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3516    <ChunkList><Chunk><Path>some.mxf</Path></Chunk></ChunkList>
3517  </Asset>
3518</AssetList></AssetMap>"#;
3519        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3520        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3521        let errors = package.validate_pkl_constraints();
3522        assert!(
3523            errors
3524                .iter()
3525                .any(|e| matches!(e, FileValidationError::AssetMapHasNoPackingList)),
3526            "Expected AssetMapHasNoPackingList, got: {:?}",
3527            errors.iter().map(|e| e.to_string()).collect::<Vec<_>>()
3528        );
3529        // `PklIdNotInAssetMap` doesn't fire here because the package
3530        // pipeline only loads PKL files when the AssetMap flags them —
3531        // so `self.packing_lists` is empty in this scenario. That
3532        // check is exercised separately in
3533        // `test_pkl_constraints_flags_pkl_id_mismatch`.
3534    }
3535
3536    /// ST 429-9 §6.3: when a PKL is loaded (via a flagged AssetMap
3537    /// asset) but the PKL document's internal Id differs from the
3538    /// AssetMap asset Id, flag the mismatch. Constructs a fixture
3539    /// where the AssetMap declares one Id but the PKL XML carries a
3540    /// different one inside.
3541    #[test]
3542    fn test_pkl_constraints_flags_pkl_id_mismatch() {
3543        use tempfile::TempDir;
3544        let dir = TempDir::new().unwrap();
3545        let root = dir.path();
3546        std::fs::write(
3547            root.join("VOLINDEX.xml"),
3548            r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#,
3549        )
3550        .unwrap();
3551        // PKL Id `99999999-...` is intentionally different from the
3552        // AssetMap asset Id `aaaaaaaa-...` that flags it.
3553        let pkl_xml = r#"<?xml version="1.0"?>
3554<PackingList xmlns="http://www.smpte-ra.org/schemas/429-8/2007/PKL">
3555<Id>urn:uuid:99999999-0000-0000-0000-000000000099</Id>
3556<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3557<AssetList>
3558  <Asset>
3559    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3560    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3561    <Size>999</Size>
3562    <Type>application/mxf</Type>
3563  </Asset>
3564</AssetList>
3565</PackingList>"#;
3566        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3567        let assetmap_xml = r#"<?xml version="1.0"?>
3568<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3569<Id>urn:uuid:dddddddd-0000-0000-0000-000000000004</Id>
3570<Creator>test</Creator><VolumeCount>1</VolumeCount>
3571<IssueDate>2024-01-01T00:00:00Z</IssueDate><Issuer>test</Issuer>
3572<AssetList>
3573  <Asset>
3574    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3575    <PackingList>true</PackingList>
3576    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3577  </Asset>
3578  <Asset>
3579    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3580    <ChunkList><Chunk><Path>some.mxf</Path></Chunk></ChunkList>
3581  </Asset>
3582</AssetList></AssetMap>"#;
3583        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3584        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3585        let errors = package.validate_pkl_constraints();
3586        assert!(
3587            errors
3588                .iter()
3589                .any(|e| matches!(e, FileValidationError::PklIdNotInAssetMap { pkl_id } if pkl_id.contains("99999999"))),
3590            "Expected PklIdNotInAssetMap for 99999999, got: {:?}",
3591            errors.iter().map(|e| e.to_string()).collect::<Vec<_>>()
3592        );
3593    }
3594
3595    /// SMPTE ST 2067-2 §9: PKL constraints validation passes on well-formed MERIDIAN.
3596    #[test]
3597    fn test_pkl_constraints_pass_on_meridian() {
3598        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3599        let package = Imferno::parse(read_dir(test_path).unwrap()).expect("parse");
3600
3601        let errors = package.validate_pkl_constraints();
3602        assert!(
3603            errors.is_empty(),
3604            "MERIDIAN PKL constraints should pass, got: {:?}",
3605            errors.iter().map(|e| e.to_string()).collect::<Vec<_>>()
3606        );
3607    }
3608
3609    // ── Unified ValidationReport pipeline ────────────────────────────────
3610
3611    /// validate_package_structure produces a clean report for MERIDIAN.
3612    #[test]
3613    fn test_validate_package_structure_meridian() {
3614        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
3615        let package = Imferno::parse(read_dir(test_path).unwrap()).expect("parse");
3616
3617        let report = package.validate(&meridian_test_options());
3618        assert!(
3619            !report.has_critical(),
3620            "MERIDIAN should have no critical issues: {}",
3621            report.summary()
3622        );
3623        assert!(
3624            !report.has_errors(),
3625            "MERIDIAN should have no errors: {}",
3626            report.summary()
3627        );
3628    }
3629
3630    /// FileValidationError::NotInAssetMap converts to REF_UNRESOLVED_UUID.
3631    #[test]
3632    fn test_file_validation_error_to_issue_not_in_assetmap() {
3633        let err = FileValidationError::NotInAssetMap {
3634            uuid: "test-uuid".to_string(),
3635            original_file_name: Some("test.mxf".to_string()),
3636        };
3637        let issue = ValidationIssue::from(&err);
3638        assert_eq!(issue.severity, Severity::Error);
3639        assert_eq!(issue.category, Category::Reference);
3640        assert_eq!(issue.code, codes::St2067_2_2020::UnresolvedUuid.code());
3641        assert!(issue.message.contains("test-uuid"));
3642    }
3643
3644    /// FileValidationError::HashMismatch converts to Critical severity.
3645    #[test]
3646    fn test_file_validation_error_to_issue_hash_mismatch() {
3647        let err = FileValidationError::HashMismatch {
3648            uuid: "asset-123".to_string(),
3649            path: PathBuf::from("/tmp/test.mxf"),
3650            expected: "abc123".to_string(),
3651            actual: "def456".to_string(),
3652        };
3653        let issue = ValidationIssue::from(&err);
3654        assert_eq!(issue.severity, Severity::Critical);
3655        assert_eq!(issue.code, codes::St2067_2_2020::ChecksumMismatch.code());
3656        assert!(issue.suggestion.is_some());
3657    }
3658
3659    /// FileValidationError::Missing converts to ASSET_FILE_NOT_FOUND.
3660    #[test]
3661    fn test_file_validation_error_to_issue_missing() {
3662        let err = FileValidationError::Missing {
3663            uuid: "missing-uuid".to_string(),
3664            path: PathBuf::from("/tmp/missing.mxf"),
3665        };
3666        let issue = ValidationIssue::from(&err);
3667        assert_eq!(issue.severity, Severity::Error);
3668        assert_eq!(issue.category, Category::Asset);
3669        assert_eq!(issue.code, codes::St2067_2_2020::FileNotFound.code());
3670    }
3671
3672    /// validate_package_structure detects PKL→AssetMap orphans.
3673    #[test]
3674    fn test_validate_package_structure_detects_orphan_pkl_assets() {
3675        use tempfile::TempDir;
3676
3677        let dir = TempDir::new().unwrap();
3678        let root = dir.path();
3679
3680        std::fs::write(root.join("VOLINDEX.xml"),
3681            r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#).unwrap();
3682
3683        // PKL references cccccccc which is NOT in AssetMap
3684        let pkl_xml = r#"<?xml version="1.0"?>
3685<PackingList xmlns="http://www.smpte-ra.org/schemas/429-8/2007/PKL">
3686<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3687<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3688<AssetList>
3689  <Asset>
3690    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000099</Id>
3691    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3692    <Size>100</Size>
3693    <Type>application/mxf</Type>
3694    <OriginalFileName>orphan.mxf</OriginalFileName>
3695  </Asset>
3696</AssetList>
3697</PackingList>"#;
3698        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3699
3700        let assetmap_xml = r#"<?xml version="1.0"?>
3701<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3702<Id>urn:uuid:dddddddd-0000-0000-0000-000000000004</Id>
3703<Creator>test</Creator>
3704<VolumeCount>1</VolumeCount>
3705<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3706<Issuer>test</Issuer>
3707<AssetList>
3708  <Asset>
3709    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3710    <PackingList>true</PackingList>
3711    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3712  </Asset>
3713</AssetList>
3714</AssetMap>"#;
3715        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3716
3717        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3718        let report = package.validate(&ValidationOptions::default());
3719
3720        assert!(
3721            report.has_errors(),
3722            "Should report errors for orphan PKL asset: {}",
3723            report.summary()
3724        );
3725        // Should have at least the NotInAssetMap error
3726        let all_issues: Vec<_> = report
3727            .errors
3728            .iter()
3729            .filter(|i| i.code == codes::St2067_2_2020::UnresolvedUuid.code())
3730            .collect();
3731        assert!(
3732            !all_issues.is_empty(),
3733            "Should have UnresolvedUuid for orphan PKL asset"
3734        );
3735    }
3736
3737    /// validate_package_structure detects missing files on disk.
3738    #[test]
3739    fn test_validate_package_structure_detects_missing_files() {
3740        use tempfile::TempDir;
3741
3742        let dir = TempDir::new().unwrap();
3743        let root = dir.path();
3744
3745        std::fs::write(root.join("VOLINDEX.xml"),
3746            r#"<?xml version="1.0"?><VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM"><Index>1</Index></VolumeIndex>"#).unwrap();
3747
3748        let pkl_xml = r#"<?xml version="1.0"?>
3749<PackingList xmlns="http://www.smpte-ra.org/schemas/429-8/2007/PKL">
3750<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3751<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3752<AssetList>
3753  <Asset>
3754    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3755    <Hash>2jmj7l5rSw0yVb/vlWAYkK/YBwk=</Hash>
3756    <Size>999</Size>
3757    <Type>application/mxf</Type>
3758    <OriginalFileName>ghost.mxf</OriginalFileName>
3759  </Asset>
3760</AssetList>
3761</PackingList>"#;
3762        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3763
3764        let assetmap_xml = r#"<?xml version="1.0"?>
3765<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3766<Id>urn:uuid:dddddddd-0000-0000-0000-000000000004</Id>
3767<Creator>test</Creator>
3768<VolumeCount>1</VolumeCount>
3769<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3770<Issuer>test</Issuer>
3771<AssetList>
3772  <Asset>
3773    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3774    <PackingList>true</PackingList>
3775    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3776  </Asset>
3777  <Asset>
3778    <Id>urn:uuid:bbbbbbbb-0000-0000-0000-000000000002</Id>
3779    <ChunkList><Chunk><Path>ghost.mxf</Path></Chunk></ChunkList>
3780  </Asset>
3781</AssetList>
3782</AssetMap>"#;
3783        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3784        // Note: ghost.mxf is NOT created on disk
3785
3786        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3787        let report = package.validate(&ValidationOptions::default());
3788
3789        assert!(
3790            report.has_errors(),
3791            "Should report errors for missing file: {}",
3792            report.summary()
3793        );
3794        let missing_issues: Vec<_> = report
3795            .errors
3796            .iter()
3797            .filter(|i| i.code == codes::St2067_2_2020::FileNotFound.code())
3798            .collect();
3799        assert!(
3800            !missing_issues.is_empty(),
3801            "Should have FileNotFound for ghost.mxf"
3802        );
3803    }
3804
3805    // ── parse_ul_bytes ──────────────────────────────────────────────────────
3806
3807    #[test]
3808    fn parse_ul_bytes_valid() {
3809        let bytes = parse_ul_bytes("urn:smpte:ul:060e2b34.04010102.0d010201.01010900");
3810        assert!(bytes.is_some());
3811        let b = bytes.unwrap();
3812        assert_eq!(b[0], 0x06);
3813        assert_eq!(b[12], 0x01);
3814        assert_eq!(b[13], 0x01); // OP1a
3815        assert_eq!(b[14], 0x09);
3816    }
3817
3818    #[test]
3819    fn parse_ul_bytes_invalid() {
3820        assert!(parse_ul_bytes("not-a-ul").is_none());
3821        assert!(parse_ul_bytes("urn:smpte:ul:060e2b34").is_none());
3822    }
3823
3824    // ── MXF header cross-validation ─────────────────────────────────────────
3825
3826    /// Build a minimal MXF byte stream with the given Operational Pattern UL.
3827    fn make_mxf_bytes(op_ul: [u8; 16]) -> Vec<u8> {
3828        let mut stream = Vec::new();
3829        // Key: Header Partition Pack (Closed and Complete)
3830        stream.extend_from_slice(&[
3831            0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02,
3832            0x04, 0x00,
3833        ]);
3834        // BER length = 88
3835        stream.push(88);
3836        // MajorVersion = 1, MinorVersion = 3
3837        stream.extend_from_slice(&[0x00, 0x01, 0x00, 0x03]);
3838        // KAGSize = 512
3839        stream.extend_from_slice(&[0x00, 0x00, 0x02, 0x00]);
3840        // ThisPartition through BodySID (56 bytes of zeros)
3841        stream.extend_from_slice(&[0u8; 56]);
3842        // OperationalPattern UL
3843        stream.extend_from_slice(&op_ul);
3844        // EssenceContainers batch: count=0, element_size=16
3845        stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
3846        stream.extend_from_slice(&[0x00, 0x00, 0x00, 0x10]);
3847        stream
3848    }
3849
3850    #[test]
3851    fn mxf_validation_accepts_op1a() {
3852        let root = tempfile::tempdir().unwrap();
3853        let root = root.path();
3854
3855        // OP1a UL
3856        let op1a: [u8; 16] = [
3857            0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x02, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x01,
3858            0x09, 0x00,
3859        ];
3860        std::fs::write(root.join("video.mxf"), make_mxf_bytes(op1a)).unwrap();
3861
3862        // Minimal PKL + AssetMap
3863        let pkl_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
3864<PackingList xmlns="http://www.smpte-ra.org/ns/2067-2/2020">
3865<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3866<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3867<AssetList>
3868  <Asset>
3869    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000001</Id>
3870    <Hash>AAAAAAAAAAAAAAAAAAAAAAAAAAA=</Hash>
3871    <Size>105</Size>
3872    <Type>application/mxf</Type>
3873    <OriginalFileName>video.mxf</OriginalFileName>
3874  </Asset>
3875</AssetList>
3876</PackingList>"#;
3877        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3878
3879        let assetmap_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
3880<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3881<Id>urn:uuid:dddddddd-0000-0000-0000-000000000001</Id>
3882<Creator>test</Creator>
3883<VolumeCount>1</VolumeCount>
3884<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3885<Issuer>test</Issuer>
3886<AssetList>
3887  <Asset>
3888    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3889    <PackingList>true</PackingList>
3890    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3891  </Asset>
3892  <Asset>
3893    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000001</Id>
3894    <ChunkList><Chunk><Path>video.mxf</Path></Chunk></ChunkList>
3895  </Asset>
3896</AssetList>
3897</AssetMap>"#
3898            .to_string();
3899        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3900
3901        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3902        let report = package.validate(&ValidationOptions::default());
3903
3904        let op_issues: Vec<_> = report
3905            .critical
3906            .iter()
3907            .chain(report.errors.iter())
3908            .chain(report.warnings.iter())
3909            .chain(report.info.iter())
3910            .filter(|i| i.code == St377_1_2011::Op1a.code())
3911            .collect();
3912        assert!(
3913            op_issues.is_empty(),
3914            "OP1a should not produce OP issues: {:#?}",
3915            op_issues,
3916        );
3917    }
3918
3919    #[test]
3920    fn mxf_validation_flags_non_op1a() {
3921        let root = tempfile::tempdir().unwrap();
3922        let root = root.path();
3923
3924        // OP-Atom UL: bytes 13-14 = 03 01 (not OP1a's 01 01)
3925        let op_atom: [u8; 16] = [
3926            0x06, 0x0E, 0x2B, 0x34, 0x04, 0x01, 0x01, 0x02, 0x0D, 0x01, 0x02, 0x01, 0x03, 0x01,
3927            0x00, 0x00,
3928        ];
3929        std::fs::write(root.join("video.mxf"), make_mxf_bytes(op_atom)).unwrap();
3930
3931        let pkl_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
3932<PackingList xmlns="http://www.smpte-ra.org/ns/2067-2/2020">
3933<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3934<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3935<AssetList>
3936  <Asset>
3937    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000001</Id>
3938    <Hash>AAAAAAAAAAAAAAAAAAAAAAAAAAA=</Hash>
3939    <Size>105</Size>
3940    <Type>application/mxf</Type>
3941    <OriginalFileName>video.mxf</OriginalFileName>
3942  </Asset>
3943</AssetList>
3944</PackingList>"#;
3945        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
3946
3947        let assetmap_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
3948<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
3949<Id>urn:uuid:dddddddd-0000-0000-0000-000000000001</Id>
3950<Creator>test</Creator>
3951<VolumeCount>1</VolumeCount>
3952<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3953<Issuer>test</Issuer>
3954<AssetList>
3955  <Asset>
3956    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3957    <PackingList>true</PackingList>
3958    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
3959  </Asset>
3960  <Asset>
3961    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000001</Id>
3962    <ChunkList><Chunk><Path>video.mxf</Path></Chunk></ChunkList>
3963  </Asset>
3964</AssetList>
3965</AssetMap>"#;
3966        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
3967
3968        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
3969        let report = package.validate(&ValidationOptions::default());
3970
3971        let op_issues: Vec<_> = report
3972            .critical
3973            .iter()
3974            .chain(report.errors.iter())
3975            .chain(report.warnings.iter())
3976            .chain(report.info.iter())
3977            .filter(|i| i.code == St377_1_2011::Op1a.code())
3978            .collect();
3979        assert_eq!(
3980            op_issues.len(),
3981            1,
3982            "Non-OP1a should produce exactly one OP issue: {:#?}",
3983            op_issues,
3984        );
3985    }
3986
3987    #[test]
3988    fn mxf_validation_warns_invalid_mxf() {
3989        let root = tempfile::tempdir().unwrap();
3990        let root = root.path();
3991
3992        // Write garbage data as an MXF file
3993        std::fs::write(root.join("bad.mxf"), b"not-an-mxf-file-at-all-garbage").unwrap();
3994
3995        let pkl_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
3996<PackingList xmlns="http://www.smpte-ra.org/ns/2067-2/2020">
3997<Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
3998<IssueDate>2024-01-01T00:00:00Z</IssueDate>
3999<AssetList>
4000  <Asset>
4001    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000001</Id>
4002    <Hash>AAAAAAAAAAAAAAAAAAAAAAAAAAA=</Hash>
4003    <Size>30</Size>
4004    <Type>application/mxf</Type>
4005    <OriginalFileName>bad.mxf</OriginalFileName>
4006  </Asset>
4007</AssetList>
4008</PackingList>"#;
4009        std::fs::write(root.join("PKL.xml"), pkl_xml).unwrap();
4010
4011        let assetmap_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
4012<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
4013<Id>urn:uuid:dddddddd-0000-0000-0000-000000000001</Id>
4014<Creator>test</Creator>
4015<VolumeCount>1</VolumeCount>
4016<IssueDate>2024-01-01T00:00:00Z</IssueDate>
4017<Issuer>test</Issuer>
4018<AssetList>
4019  <Asset>
4020    <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
4021    <PackingList>true</PackingList>
4022    <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
4023  </Asset>
4024  <Asset>
4025    <Id>urn:uuid:cccccccc-0000-0000-0000-000000000001</Id>
4026    <ChunkList><Chunk><Path>bad.mxf</Path></Chunk></ChunkList>
4027  </Asset>
4028</AssetList>
4029</AssetMap>"#;
4030        std::fs::write(root.join("ASSETMAP.xml"), assetmap_xml).unwrap();
4031
4032        let package = Imferno::parse(read_dir(root).unwrap()).expect("parse");
4033        let report = package.validate(&ValidationOptions::default());
4034
4035        let notmxf_issues: Vec<_> = report
4036            .critical
4037            .iter()
4038            .chain(report.errors.iter())
4039            .chain(report.warnings.iter())
4040            .chain(report.info.iter())
4041            .filter(|i| i.code == St377_1_2011::NotMxf.code())
4042            .collect();
4043        assert!(
4044            !notmxf_issues.is_empty(),
4045            "Invalid MXF should produce ST377-1-NotMxf warning: {:#?}",
4046            report.warnings,
4047        );
4048    }
4049
4050    // ═════════════════════════════════════════════════════════════════════════
4051    // Normative-claim gap closure: From<&FileValidationError> remaining variants
4052    // ═════════════════════════════════════════════════════════════════════════
4053
4054    /// FileValidationError::SizeMismatch converts to ASSET-005.
4055    #[test]
4056    fn test_file_validation_error_to_issue_size_mismatch() {
4057        let err = FileValidationError::SizeMismatch {
4058            uuid: "size-uuid".to_string(),
4059            path: PathBuf::from("/tmp/test.mxf"),
4060            expected: 1000,
4061            actual: 2000,
4062        };
4063        let issue = ValidationIssue::from(&err);
4064        assert_eq!(issue.severity, Severity::Error);
4065        assert_eq!(issue.category, Category::Asset);
4066        assert_eq!(issue.code, St2067_2_2020::SizeMismatch.code());
4067        assert!(issue.message.contains("1000"));
4068        assert!(issue.message.contains("2000"));
4069    }
4070
4071    /// FileValidationError::Io converts to ASSET-006.
4072    #[test]
4073    fn test_file_validation_error_to_issue_io() {
4074        let err = FileValidationError::Io {
4075            uuid: "io-uuid".to_string(),
4076            path: PathBuf::from("/tmp/broken.mxf"),
4077            message: "permission denied".to_string(),
4078        };
4079        let issue = ValidationIssue::from(&err);
4080        assert_eq!(issue.severity, Severity::Error);
4081        assert_eq!(issue.category, Category::Asset);
4082        assert_eq!(issue.code, "IMF:General/IoError");
4083        assert!(issue.message.contains("permission denied"));
4084    }
4085
4086    /// FileValidationError::DuplicatePklAssetId converts to REF_DUPLICATE_UUID.
4087    #[test]
4088    fn test_file_validation_error_to_issue_duplicate_pkl_asset_id() {
4089        let err = FileValidationError::DuplicatePklAssetId {
4090            uuid: "dup-uuid".to_string(),
4091            pkl_id: "pkl-001".to_string(),
4092        };
4093        let issue = ValidationIssue::from(&err);
4094        assert_eq!(issue.severity, Severity::Error);
4095        assert_eq!(issue.category, Category::Reference);
4096        assert_eq!(issue.code, codes::St2067_2_2020::DuplicateUuid.code());
4097        assert!(issue.message.contains("dup-uuid"));
4098        assert!(issue.message.contains("pkl-001"));
4099    }
4100
4101    // ═════════════════════════════════════════════════════════════════════════
4102    // Normative-claim gap closure: validate_multi_pkl_consistency
4103    // ═════════════════════════════════════════════════════════════════════════
4104
4105    /// validate_package_structure on single-PKL fixture should not emit cross-PKL issues.
4106    #[test]
4107    fn test_multi_pkl_single_pkl_no_cross_pkl_issues() {
4108        let fixture_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
4109            .parent()
4110            .unwrap()
4111            .parent()
4112            .unwrap()
4113            .join("fixture");
4114        if !fixture_path.exists() {
4115            eprintln!("skipping: fixture/ not present");
4116            return;
4117        }
4118        let package = Imferno::parse(read_dir(fixture_path).unwrap()).expect("parse fixture");
4119        let report = package.validate(&ValidationOptions::default());
4120        assert!(
4121            !report
4122                .errors
4123                .iter()
4124                .any(|i| i.code.contains("ChecksumMismatch")
4125                    || i.code == St2067_2_2020::SizeMismatch.code()),
4126            "Single-PKL package should have no multi-PKL consistency issues: {:#?}",
4127            report.errors,
4128        );
4129    }
4130
4131    // ═════════════════════════════════════════════════════════════════════════
4132    // Normative-claim gap closure: validate_segment_durations (positive path)
4133    // ═════════════════════════════════════════════════════════════════════════
4134
4135    /// Segment duration validation on fixture should pass (tracks have matching durations).
4136    #[test]
4137    fn test_segment_durations_fixture_pass() {
4138        let fixture_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
4139            .parent()
4140            .unwrap()
4141            .parent()
4142            .unwrap()
4143            .join("fixture");
4144        if !fixture_path.exists() {
4145            eprintln!("skipping: fixture/ not present");
4146            return;
4147        }
4148        let package = Imferno::parse(read_dir(fixture_path).unwrap()).expect("parse fixture");
4149        let report = package.validate(&ValidationOptions::default());
4150        let duration_issues: Vec<_> = report
4151            .errors
4152            .iter()
4153            .filter(|i| i.code.contains("SegmentDuration"))
4154            .collect();
4155        assert!(
4156            duration_issues.is_empty(),
4157            "Fixture should have matching segment durations: {:#?}",
4158            duration_issues,
4159        );
4160    }
4161
4162    /// Regression guard: emitted package validation codes should not use :General fallback.
4163    #[test]
4164    fn test_emitted_codes_do_not_use_general_fallback() {
4165        let fixture_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
4166            .parent()
4167            .unwrap()
4168            .parent()
4169            .unwrap()
4170            .join("fixture");
4171        if !fixture_path.exists() {
4172            eprintln!("skipping: fixture/ not present");
4173            return;
4174        }
4175        let package = Imferno::parse(read_dir(fixture_path).unwrap()).expect("parse fixture");
4176        let report = package.validate(&ValidationOptions::default());
4177
4178        let all_issues: Vec<_> = report
4179            .critical
4180            .iter()
4181            .chain(report.errors.iter())
4182            .chain(report.warnings.iter())
4183            .chain(report.info.iter())
4184            .collect();
4185
4186        assert!(
4187            !all_issues.iter().any(|i| i.code.contains(":General/")),
4188            "Package validator emitted :General fallback codes: {:#?}",
4189            all_issues,
4190        );
4191    }
4192
4193    // ═════════════════════════════════════════════════════════════════════════
4194    // ST 429-9 — VolindexMissing and MalformedXml
4195    // ═════════════════════════════════════════════════════════════════════════
4196
4197    /// Minimal AssetMap with a PackingList-flagged asset. ST 429-9 §6.3
4198    /// requires every AssetMap to identify at least one PKL via
4199    /// `<PackingList>true</PackingList>` — fixtures that don't include
4200    /// one trip the new AssetMapHasNoPackingList check.
4201    const MINIMAL_ASSETMAP: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
4202<AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
4203  <Id>urn:uuid:dddddddd-0000-0000-0000-000000000001</Id>
4204  <Creator>test</Creator>
4205  <VolumeCount>1</VolumeCount>
4206  <IssueDate>2024-01-01T00:00:00Z</IssueDate>
4207  <Issuer>test</Issuer>
4208  <AssetList>
4209    <Asset>
4210      <Id>urn:uuid:aaaaaaaa-0000-0000-0000-000000000001</Id>
4211      <PackingList>true</PackingList>
4212      <ChunkList><Chunk><Path>PKL.xml</Path></Chunk></ChunkList>
4213    </Asset>
4214    <Asset>
4215      <Id>urn:uuid:eeeeeeee-0000-0000-0000-000000000001</Id>
4216      <ChunkList><Chunk><Path>dummy.mxf</Path></Chunk></ChunkList>
4217    </Asset>
4218  </AssetList>
4219</AssetMap>"#;
4220
4221    const VALID_VOLINDEX: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
4222<VolumeIndex xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
4223  <Index>1</Index>
4224</VolumeIndex>"#;
4225
4226    /// ST 429-9 §7: absent VOLINDEX.xml emits VolindexMissing (Info severity).
4227    #[test]
4228    fn volindex_missing_emits_info() {
4229        let mut files = HashMap::new();
4230        files.insert("ASSETMAP.xml".to_string(), MINIMAL_ASSETMAP.to_string());
4231
4232        let pkg = Imferno::parse(files).expect("parse");
4233        let report = pkg.validate(&ValidationOptions::default());
4234
4235        let all: Vec<_> = report.info.iter().collect();
4236        assert!(
4237            all.iter().any(|i| i.code.contains("VolindexMissing")),
4238            "expected VolindexMissing info, got: {all:?}",
4239        );
4240    }
4241
4242    /// ST 429-9 §7: malformed VOLINDEX.xml emits MalformedXml (Error severity).
4243    #[test]
4244    fn volindex_malformed_emits_error() {
4245        let mut files = HashMap::new();
4246        files.insert("ASSETMAP.xml".to_string(), MINIMAL_ASSETMAP.to_string());
4247        files.insert(
4248            "VOLINDEX.xml".to_string(),
4249            "not xml <<< garbage".to_string(),
4250        );
4251
4252        let pkg = Imferno::parse(files).expect("parse");
4253        let report = pkg.validate(&ValidationOptions::default());
4254
4255        assert!(
4256            report
4257                .errors
4258                .iter()
4259                .any(|i| i.code.contains("MalformedXml")),
4260            "expected MalformedXml error, got: {:?}",
4261            report.errors,
4262        );
4263    }
4264
4265    /// ST 429-9 §7: valid VOLINDEX.xml produces no VOLINDEX diagnostic.
4266    #[test]
4267    fn volindex_valid_no_issue() {
4268        let mut files = HashMap::new();
4269        files.insert("ASSETMAP.xml".to_string(), MINIMAL_ASSETMAP.to_string());
4270        files.insert("VOLINDEX.xml".to_string(), VALID_VOLINDEX.to_string());
4271
4272        let pkg = Imferno::parse(files).expect("parse");
4273        let report = pkg.validate(&ValidationOptions::default());
4274
4275        let all: Vec<_> = report
4276            .critical
4277            .iter()
4278            .chain(report.errors.iter())
4279            .chain(report.warnings.iter())
4280            .chain(report.info.iter())
4281            .filter(|i| i.code.contains("ST429-9"))
4282            .collect();
4283        assert!(
4284            all.is_empty(),
4285            "expected no ST 429-9 diagnostics for valid VOLINDEX, got: {all:?}",
4286        );
4287    }
4288
4289    // ── sanitize_asset_path tests ─────────────────────────────────────────
4290
4291    #[test]
4292    fn sanitize_simple_relative_path() {
4293        let root = std::env::temp_dir();
4294        assert!(sanitize_asset_path(&root, "video.mxf").is_some());
4295    }
4296
4297    #[test]
4298    fn sanitize_nested_relative_path() {
4299        let root = std::env::temp_dir();
4300        assert!(sanitize_asset_path(&root, "subdir/video.mxf").is_some());
4301    }
4302
4303    #[test]
4304    fn sanitize_rejects_parent_dir_traversal() {
4305        let root = std::env::temp_dir();
4306        assert!(sanitize_asset_path(&root, "../escape.mxf").is_none());
4307    }
4308
4309    #[test]
4310    fn sanitize_rejects_deep_traversal() {
4311        let root = std::env::temp_dir();
4312        assert!(sanitize_asset_path(&root, "sub/../../escape.mxf").is_none());
4313    }
4314
4315    #[test]
4316    fn sanitize_rejects_absolute_path() {
4317        let root = std::env::temp_dir();
4318        assert!(sanitize_asset_path(&root, "/etc/passwd").is_none());
4319    }
4320
4321    #[test]
4322    fn sanitize_rejects_double_dot_prefix() {
4323        let root = std::env::temp_dir();
4324        assert!(sanitize_asset_path(&root, "../../etc/shadow").is_none());
4325    }
4326
4327    // ── parse_issues tests ────────────────────────────────────────────────
4328
4329    /// Minimal valid ASSETMAP XML template with placeholders for assets.
4330    fn minimal_assetmap(assets_xml: &str) -> String {
4331        format!(
4332            r#"<?xml version="1.0" encoding="UTF-8"?>
4333            <AssetMap xmlns="http://www.smpte-ra.org/schemas/429-9/2007/AM">
4334              <Id>urn:uuid:00000000-0000-0000-0000-000000000001</Id>
4335              <VolumeCount>1</VolumeCount>
4336              <IssueDate>2024-01-01T00:00:00+00:00</IssueDate>
4337              <Issuer>test</Issuer>
4338              <AssetList>{}</AssetList>
4339            </AssetMap>"#,
4340            assets_xml,
4341        )
4342    }
4343
4344    #[test]
4345    fn malformed_pkl_produces_parse_issue() {
4346        let mut files = HashMap::new();
4347        files.insert(
4348            "ASSETMAP.xml".to_string(),
4349            minimal_assetmap(
4350                r#"<Asset>
4351                  <Id>urn:uuid:00000000-0000-0000-0000-000000000002</Id>
4352                  <PackingList>true</PackingList>
4353                  <ChunkList><Chunk><Path>PKL.xml</Path><VolumeIndex>1</VolumeIndex></Chunk></ChunkList>
4354                </Asset>"#,
4355            ),
4356        );
4357        // Deliberately malformed PKL
4358        files.insert("PKL.xml".to_string(), "<not-a-pkl/>".to_string());
4359
4360        let package = Imferno::parse(files).expect("parse should succeed even with bad PKL");
4361        assert!(
4362            package
4363                .parse_issues
4364                .iter()
4365                .any(|i| i.code == codes::ImfernoCode::PklParseError.code()),
4366            "expected PklParseError issue, got: {:?}",
4367            package.parse_issues,
4368        );
4369    }
4370
4371    #[test]
4372    fn unparseable_xml_asset_produces_parse_issue() {
4373        let mut files = HashMap::new();
4374        files.insert(
4375            "ASSETMAP.xml".to_string(),
4376            minimal_assetmap(
4377                r#"<Asset>
4378                  <Id>urn:uuid:00000000-0000-0000-0000-000000000003</Id>
4379                  <ChunkList><Chunk><Path>MYSTERY.xml</Path><VolumeIndex>1</VolumeIndex></Chunk></ChunkList>
4380                </Asset>"#,
4381            ),
4382        );
4383        files.insert("MYSTERY.xml".to_string(), "<SomethingElse/>".to_string());
4384
4385        let package = Imferno::parse(files).expect("parse should succeed");
4386        assert!(
4387            package
4388                .parse_issues
4389                .iter()
4390                .any(|i| i.code == codes::ImfernoCode::XmlAssetParseError.code()),
4391            "expected XmlAssetParseError issue, got: {:?}",
4392            package.parse_issues,
4393        );
4394    }
4395
4396    #[test]
4397    fn path_traversal_produces_parse_issue() {
4398        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
4399        let files = read_dir(test_path).unwrap();
4400        let package = Imferno::parse(files).expect("parse should succeed");
4401
4402        // Simulate what would happen with a traversal path by checking
4403        // that our existing valid package has NO traversal issues
4404        assert!(
4405            !package
4406                .parse_issues
4407                .iter()
4408                .any(|i| i.code == codes::ImfernoCode::PathTraversal.code()),
4409            "valid package should have no path traversal issues",
4410        );
4411    }
4412
4413    #[allow(deprecated)]
4414    #[test]
4415    fn sequence_language_extracted_from_descriptors() {
4416        let test_path = test_data("MERIDIAN_Netflix_Photon_161006");
4417        let files = read_dir(test_path).unwrap();
4418        let package = Imferno::parse(files).unwrap();
4419        let report =
4420            crate::package::report::build_report(&package, &ValidationOptions::default(), None)
4421                .unwrap();
4422        for cpl in &report.cpls {
4423            let audio_seqs: Vec<_> = cpl
4424                .sequences
4425                .iter()
4426                .filter(|s| s.r#type == "MainAudio")
4427                .collect();
4428            assert!(
4429                !audio_seqs.is_empty(),
4430                "should have at least one audio sequence"
4431            );
4432            for seq in &audio_seqs {
4433                eprintln!("Audio seq {} language: {:?}", seq.track_id, seq.language);
4434                assert_eq!(
4435                    seq.language.as_deref(),
4436                    Some("en"),
4437                    "MERIDIAN audio should have language 'en', got {:?}",
4438                    seq.language,
4439                );
4440            }
4441        }
4442    }
4443}