colconv 0.1.0 - Docs.rs

//! 10-bit planar YUV 4:2:0 dispatchers — 4 variants.

#[cfg(any(
  target_arch = "aarch64",
  target_arch = "x86_64",
  target_arch = "wasm32"
))]
use crate::row::arch;
#[cfg(target_arch = "aarch64")]
use crate::row::neon_available;
#[cfg(target_arch = "wasm32")]
use crate::row::simd128_available;
#[cfg(target_arch = "x86_64")]
use crate::row::{avx2_available, avx512_available, sse41_available};
use crate::{
  ColorMatrix,
  row::{rgb_row_bytes, rgb_row_elems, rgba_row_bytes, rgba_row_elems, scalar},
};

/// Converts one row of **10‑bit** YUV 4:2:0 to packed **8‑bit** RGB.
///
/// Samples are `u16` with 10 active bits in the low bits of each
/// element. Output is packed `R, G, B` bytes (`3 * width` bytes),
/// with the conversion clamping to `[0, 255]` — the native‑depth
/// path is [`yuv420p10_to_rgb_u16_row`].
///
/// See `scalar::yuv_420p_n_to_rgb_row` for the full semantic
/// specification. `use_simd = false` forces the scalar reference
/// path.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_row_endian(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgb_out: &mut [u8],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
  big_endian: bool,
) {
  assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
  let rgb_min = rgb_row_bytes(width);
  assert!(y.len() >= width, "y row too short");
  assert!(u_half.len() >= width / 2, "u_half row too short");
  assert!(v_half.len() >= width / 2, "v_half row too short");
  assert!(rgb_out.len() >= rgb_min, "rgb_out row too short");

  macro_rules! dispatch_be {
    ($call_le:expr, $call_be:expr) => {
      if big_endian { $call_be } else { $call_le }
    };
  }

  if use_simd {
    cfg_select! {
      target_arch = "aarch64" => {
        if neon_available() {
          // SAFETY: NEON verified on this CPU; bounds / parity are
          // the caller's obligation (asserted above).
          dispatch_be!(
            unsafe { arch::neon::yuv_420p_n_to_rgb_row::<10, false>(y, u_half, v_half, rgb_out, width, matrix, full_range); },
            unsafe { arch::neon::yuv_420p_n_to_rgb_row::<10, true>(y, u_half, v_half, rgb_out, width, matrix, full_range); }
          );
          return;
        }
      },
      target_arch = "x86_64" => {
        if avx512_available() {
          // SAFETY: AVX‑512BW verified.
          dispatch_be!(
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if avx2_available() {
          // SAFETY: AVX2 verified.
          dispatch_be!(
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if sse41_available() {
          // SAFETY: SSE4.1 verified.
          dispatch_be!(
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      target_arch = "wasm32" => {
        if simd128_available() {
          // SAFETY: simd128 compile‑time verified.
          dispatch_be!(
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      _ => {}
    }
  }

  dispatch_be!(
    scalar::yuv_420p_n_to_rgb_row::<10, false>(
      y, u_half, v_half, rgb_out, width, matrix, full_range
    ),
    scalar::yuv_420p_n_to_rgb_row::<10, true>(
      y, u_half, v_half, rgb_out, width, matrix, full_range
    )
  );
}

/// LE-only wrapper around [`yuv420p10_to_rgb_row_endian`]; preserves the pre-endian-aware
/// public signature so existing little-endian callers compile unchanged.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_row(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgb_out: &mut [u8],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
) {
  yuv420p10_to_rgb_row_endian(
    y, u_half, v_half, rgb_out, width, matrix, full_range, use_simd, false,
  );
}

/// Converts one row of **10‑bit** YUV 4:2:0 to **native‑depth** packed
/// RGB `u16` (10‑bit values in the **low** 10 bits of each `u16`,
/// matching FFmpeg's `yuv420p10le` convention). Use this for lossless
/// downstream HDR processing when the consumer expects low‑bit‑packed
/// samples.
///
/// Output is packed `R, G, B` triples: `rgb_out[3 * width]` `u16`
/// elements, each in `[0, 1023]` with the upper 6 bits zero.
///
/// This is **not** the FFmpeg `p010` layout — `p010` stores samples
/// in the **high** 10 bits of each `u16` (`sample << 6`). Callers
/// feeding this output into a p010 consumer must shift left by 6
/// before handing off.
///
/// See `scalar::yuv_420p_n_to_rgb_u16_row` for the full semantic
/// specification. `use_simd = false` forces the scalar reference
/// path.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_u16_row_endian(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgb_out: &mut [u16],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
  big_endian: bool,
) {
  assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
  let rgb_min = rgb_row_elems(width);
  assert!(y.len() >= width, "y row too short");
  assert!(u_half.len() >= width / 2, "u_half row too short");
  assert!(v_half.len() >= width / 2, "v_half row too short");
  assert!(rgb_out.len() >= rgb_min, "rgb_out row too short");

  macro_rules! dispatch_be {
    ($call_le:expr, $call_be:expr) => {
      if big_endian { $call_be } else { $call_le }
    };
  }

  if use_simd {
    cfg_select! {
      target_arch = "aarch64" => {
        if neon_available() {
          // SAFETY: NEON verified.
          dispatch_be!(
            unsafe { arch::neon::yuv_420p_n_to_rgb_u16_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::neon::yuv_420p_n_to_rgb_u16_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      target_arch = "x86_64" => {
        if avx512_available() {
          // SAFETY: AVX‑512BW verified.
          dispatch_be!(
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_u16_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgb_u16_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if avx2_available() {
          // SAFETY: AVX2 verified.
          dispatch_be!(
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_u16_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgb_u16_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if sse41_available() {
          // SAFETY: SSE4.1 verified.
          dispatch_be!(
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_u16_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgb_u16_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      target_arch = "wasm32" => {
        if simd128_available() {
          // SAFETY: simd128 compile‑time verified.
          dispatch_be!(
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_u16_row::<10, false>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); },
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgb_u16_row::<10, true>(
              y, u_half, v_half, rgb_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      _ => {}
    }
  }

  dispatch_be!(
    scalar::yuv_420p_n_to_rgb_u16_row::<10, false>(
      y, u_half, v_half, rgb_out, width, matrix, full_range,
    ),
    scalar::yuv_420p_n_to_rgb_u16_row::<10, true>(
      y, u_half, v_half, rgb_out, width, matrix, full_range,
    )
  );
}

/// LE-only wrapper around [`yuv420p10_to_rgb_u16_row_endian`]; preserves the pre-endian-aware
/// public signature so existing little-endian callers compile unchanged.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgb_u16_row(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgb_out: &mut [u16],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
) {
  yuv420p10_to_rgb_u16_row_endian(
    y, u_half, v_half, rgb_out, width, matrix, full_range, use_simd, false,
  );
}

/// Converts one row of **10-bit** YUV 4:2:0 to packed **8-bit**
/// **RGBA** (`R, G, B, 0xFF`; alpha defaults to opaque since the
/// source has no alpha plane).
///
/// Same numerical contract as [`yuv420p10_to_rgb_row`] except
/// for the per-pixel stride (4 vs 3) and the constant alpha byte. See
/// `scalar::yuv_420p_n_to_rgba_row` for the reference.
///
/// `use_simd = false` forces the scalar reference path.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_row_endian(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgba_out: &mut [u8],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
  big_endian: bool,
) {
  assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
  let rgba_min = rgba_row_bytes(width);
  assert!(y.len() >= width, "y row too short");
  assert!(u_half.len() >= width / 2, "u_half row too short");
  assert!(v_half.len() >= width / 2, "v_half row too short");
  assert!(rgba_out.len() >= rgba_min, "rgba_out row too short");

  macro_rules! dispatch_be {
    ($call_le:expr, $call_be:expr) => {
      if big_endian { $call_be } else { $call_le }
    };
  }

  if use_simd {
    cfg_select! {
      target_arch = "aarch64" => {
        if neon_available() {
          // SAFETY: NEON verified.
          dispatch_be!(
            unsafe { arch::neon::yuv_420p_n_to_rgba_row::<10, false>(y, u_half, v_half, rgba_out, width, matrix, full_range); },
            unsafe { arch::neon::yuv_420p_n_to_rgba_row::<10, true>(y, u_half, v_half, rgba_out, width, matrix, full_range); }
          );
          return;
        }
      },
      target_arch = "x86_64" => {
        if avx512_available() {
          // SAFETY: AVX‑512BW verified.
          dispatch_be!(
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if avx2_available() {
          // SAFETY: AVX2 verified.
          dispatch_be!(
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if sse41_available() {
          // SAFETY: SSE4.1 verified.
          dispatch_be!(
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      target_arch = "wasm32" => {
        if simd128_available() {
          // SAFETY: simd128 compile‑time verified.
          dispatch_be!(
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      _ => {}
    }
  }

  dispatch_be!(
    scalar::yuv_420p_n_to_rgba_row::<10, false>(
      y, u_half, v_half, rgba_out, width, matrix, full_range,
    ),
    scalar::yuv_420p_n_to_rgba_row::<10, true>(
      y, u_half, v_half, rgba_out, width, matrix, full_range,
    )
  );
}

/// LE-only wrapper around [`yuv420p10_to_rgba_row_endian`]; preserves the pre-endian-aware
/// public signature so existing little-endian callers compile unchanged.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_row(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgba_out: &mut [u8],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
) {
  yuv420p10_to_rgba_row_endian(
    y, u_half, v_half, rgba_out, width, matrix, full_range, use_simd, false,
  );
}

/// Converts one row of **10-bit** YUV 4:2:0 to **native-depth `u16`**
/// packed **RGBA** — output is low-bit-packed (`[0, (1 << 10) - 1]`
/// in the low bits of each `u16`); alpha element is `(1 << 10) - 1`
/// (opaque maximum at the input bit depth).
///
/// See `scalar::yuv_420p_n_to_rgba_u16_row` for the reference.
/// `use_simd = false` forces the scalar reference path.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_u16_row_endian(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgba_out: &mut [u16],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
  big_endian: bool,
) {
  assert_eq!(width & 1, 0, "YUV 4:2:0 requires even width");
  let rgba_min = rgba_row_elems(width);
  assert!(y.len() >= width, "y row too short");
  assert!(u_half.len() >= width / 2, "u_half row too short");
  assert!(v_half.len() >= width / 2, "v_half row too short");
  assert!(rgba_out.len() >= rgba_min, "rgba_out row too short");

  macro_rules! dispatch_be {
    ($call_le:expr, $call_be:expr) => {
      if big_endian { $call_be } else { $call_le }
    };
  }

  if use_simd {
    cfg_select! {
      target_arch = "aarch64" => {
        if neon_available() {
          // SAFETY: NEON verified.
          dispatch_be!(
            unsafe { arch::neon::yuv_420p_n_to_rgba_u16_row::<10, false>(y, u_half, v_half, rgba_out, width, matrix, full_range); },
            unsafe { arch::neon::yuv_420p_n_to_rgba_u16_row::<10, true>(y, u_half, v_half, rgba_out, width, matrix, full_range); }
          );
          return;
        }
      },
      target_arch = "x86_64" => {
        if avx512_available() {
          // SAFETY: AVX‑512BW verified.
          dispatch_be!(
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_u16_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx512::yuv_420p_n_to_rgba_u16_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if avx2_available() {
          // SAFETY: AVX2 verified.
          dispatch_be!(
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_u16_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_avx2::yuv_420p_n_to_rgba_u16_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
        if sse41_available() {
          // SAFETY: SSE4.1 verified.
          dispatch_be!(
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_u16_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::x86_sse41::yuv_420p_n_to_rgba_u16_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      target_arch = "wasm32" => {
        if simd128_available() {
          // SAFETY: simd128 compile‑time verified.
          dispatch_be!(
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_u16_row::<10, false>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); },
            unsafe { arch::wasm_simd128::yuv_420p_n_to_rgba_u16_row::<10, true>(
              y, u_half, v_half, rgba_out, width, matrix, full_range,
            ); }
          );
          return;
        }
      },
      _ => {}
    }
  }

  dispatch_be!(
    scalar::yuv_420p_n_to_rgba_u16_row::<10, false>(
      y, u_half, v_half, rgba_out, width, matrix, full_range,
    ),
    scalar::yuv_420p_n_to_rgba_u16_row::<10, true>(
      y, u_half, v_half, rgba_out, width, matrix, full_range,
    )
  );
}

/// LE-only wrapper around [`yuv420p10_to_rgba_u16_row_endian`]; preserves the pre-endian-aware
/// public signature so existing little-endian callers compile unchanged.
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub fn yuv420p10_to_rgba_u16_row(
  y: &[u16],
  u_half: &[u16],
  v_half: &[u16],
  rgba_out: &mut [u16],
  width: usize,
  matrix: ColorMatrix,
  full_range: bool,
  use_simd: bool,
) {
  yuv420p10_to_rgba_u16_row_endian(
    y, u_half, v_half, rgba_out, width, matrix, full_range, use_simd, false,
  );
}

#[cfg(all(test, feature = "std"))]
// Host-independent BE parity tests — fixtures built byte-wise via
// `to_le_bytes` / `to_be_bytes` and reinterpreted with `from_ne_bytes`,
// so the LE and BE variants exercise the dispatcher's `BE=false` and
// `BE=true` paths regardless of host endianness.
mod be_parity_tests {
  use super::*;

  /// Build LE/BE host-native u16 buffers from a slice of intended 10-bit
  /// samples (values must already be in the correct low-bit-packed form
  /// — `[0, 1023]`). Returns `(le, be)` where each contains `u16`
  /// elements that, when serialized via `to_ne_bytes`, reproduce the
  /// LE/BE wire bytes.
  fn split_le_be(intended: &[u16]) -> (std::vec::Vec<u16>, std::vec::Vec<u16>) {
    let le_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_le_bytes()).collect();
    let be_bytes: std::vec::Vec<u8> = intended.iter().flat_map(|v| v.to_be_bytes()).collect();
    let le: std::vec::Vec<u16> = le_bytes
      .chunks_exact(2)
      .map(|b| u16::from_ne_bytes([b[0], b[1]]))
      .collect();
    let be: std::vec::Vec<u16> = be_bytes
      .chunks_exact(2)
      .map(|b| u16::from_ne_bytes([b[0], b[1]]))
      .collect();
    (le, be)
  }

  fn pseudo_y(width: usize, seed: u32) -> std::vec::Vec<u16> {
    (0..width)
      .map(|i| ((seed.wrapping_mul(i as u32 + 1).wrapping_add(0x55)) & 0x3FF) as u16)
      .collect()
  }
  fn pseudo_uv(half: usize, seed: u32) -> std::vec::Vec<u16> {
    (0..half)
      .map(|i| ((seed.wrapping_mul(i as u32 + 7).wrapping_add(0x123)) & 0x3FF) as u16)
      .collect()
  }

  #[test]
  #[cfg_attr(
    miri,
    ignore = "SIMD-dispatched row kernels use intrinsics unsupported by Miri"
  )]
  fn yuv420p10_dispatch_be_le_parity_simd_and_scalar() {
    for w in [8usize, 16, 24] {
      let half = w / 2;
      let y_int = pseudo_y(w, 0xA17F);
      let u_int = pseudo_uv(half, 0xC0DE);
      let v_int = pseudo_uv(half, 0xBEEF);
      let (y_le, y_be) = split_le_be(&y_int);
      let (u_le, u_be) = split_le_be(&u_int);
      let (v_le, v_be) = split_le_be(&v_int);

      for &use_simd in &[false, true] {
        // u8 RGB
        let mut out_le = std::vec![0u8; w * 3];
        let mut out_be = std::vec![0u8; w * 3];
        yuv420p10_to_rgb_row_endian(
          &y_le,
          &u_le,
          &v_le,
          &mut out_le,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          false,
        );
        yuv420p10_to_rgb_row_endian(
          &y_be,
          &u_be,
          &v_be,
          &mut out_be,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          true,
        );
        assert_eq!(
          out_le, out_be,
          "yuv420p10 rgb BE/LE parity (w={w}, simd={use_simd})"
        );

        // u16 RGB
        let mut out_le16 = std::vec![0u16; w * 3];
        let mut out_be16 = std::vec![0u16; w * 3];
        yuv420p10_to_rgb_u16_row_endian(
          &y_le,
          &u_le,
          &v_le,
          &mut out_le16,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          false,
        );
        yuv420p10_to_rgb_u16_row_endian(
          &y_be,
          &u_be,
          &v_be,
          &mut out_be16,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          true,
        );
        assert_eq!(out_le16, out_be16, "yuv420p10 rgb_u16 BE/LE parity");

        // u8 RGBA
        let mut out_le4 = std::vec![0u8; w * 4];
        let mut out_be4 = std::vec![0u8; w * 4];
        yuv420p10_to_rgba_row_endian(
          &y_le,
          &u_le,
          &v_le,
          &mut out_le4,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          false,
        );
        yuv420p10_to_rgba_row_endian(
          &y_be,
          &u_be,
          &v_be,
          &mut out_be4,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          true,
        );
        assert_eq!(out_le4, out_be4, "yuv420p10 rgba BE/LE parity");

        // u16 RGBA
        let mut out_le4u = std::vec![0u16; w * 4];
        let mut out_be4u = std::vec![0u16; w * 4];
        yuv420p10_to_rgba_u16_row_endian(
          &y_le,
          &u_le,
          &v_le,
          &mut out_le4u,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          false,
        );
        yuv420p10_to_rgba_u16_row_endian(
          &y_be,
          &u_be,
          &v_be,
          &mut out_be4u,
          w,
          ColorMatrix::Bt709,
          false,
          use_simd,
          true,
        );
        assert_eq!(out_le4u, out_be4u, "yuv420p10 rgba_u16 BE/LE parity");
      }
    }
  }
}