#[allow(unused_imports)]
use crate::internals::ProcessedOffset;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::neon::neon_yuv_p16_to_rgba_alpha_row;
use crate::numerics::to_ne;
use crate::yuv_error::check_rgba_destination;
use crate::yuv_support::{
get_yuv_range, search_inverse_transform, YuvBytesPacking, YuvChromaSubsampling, YuvEndianness,
YuvRange, YuvSourceChannels, YuvStandardMatrix,
};
use crate::{YuvError, YuvPlanarImageWithAlpha};
#[cfg(feature = "rayon")]
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
#[cfg(feature = "rayon")]
use rayon::prelude::{ParallelSlice, ParallelSliceMut};
fn yuv_p16_to_image_alpha_ant<
const DESTINATION_CHANNELS: u8,
const SAMPLING: u8,
const ENDIANNESS: u8,
const BYTES_POSITION: u8,
const BIT_DEPTH: usize,
>(
image: &YuvPlanarImageWithAlpha<u16>,
rgba: &mut [u8],
rgba_stride: u32,
range: YuvRange,
matrix: YuvStandardMatrix,
) -> Result<(), YuvError> {
let dst_chans: YuvSourceChannels = DESTINATION_CHANNELS.into();
let channels = dst_chans.get_channels_count();
assert!(
dst_chans != YuvSourceChannels::Rgb && dst_chans != YuvSourceChannels::Bgr,
"Cannot call YUV p16 to Rgb8 with alpha without real alpha"
);
assert!(
BIT_DEPTH == 10 || BIT_DEPTH == 12,
"YUV16 -> RGB8 implemented only 10 and 12 bit depth"
);
let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
image.check_constraints(chroma_subsampling)?;
check_rgba_destination(rgba, rgba_stride, image.width, image.height, channels)?;
let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
let kr_kb = matrix.get_kr_kb();
const PRECISION: i32 = 13;
let i_transform = search_inverse_transform(
PRECISION,
BIT_DEPTH as u32,
range,
matrix,
chroma_range,
kr_kb,
);
let cr_coef = i_transform.cr_coef;
let cb_coef = i_transform.cb_coef;
let y_coef = i_transform.y_coef;
let g_coef_1 = i_transform.g_coeff_1;
let g_coef_2 = i_transform.g_coeff_2;
let bias_y = chroma_range.bias_y as i32;
let bias_uv = chroma_range.bias_uv as i32;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
let use_sse = std::arch::is_x86_feature_detected!("sse4.1");
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
let use_avx = std::arch::is_x86_feature_detected!("avx2");
let msb_shift = (16 - BIT_DEPTH) as i32;
let store_shift = BIT_DEPTH - 8;
#[inline(always)]
fn qrshr<const BIT_DEPTH: usize>(val: i32) -> i32 {
let total_shift = PRECISION + (BIT_DEPTH as i32 - 8);
let rounding: i32 = 1 << (total_shift - 1);
let max_value: i32 = (1 << BIT_DEPTH) - 1;
((val + rounding) >> total_shift).min(max_value).max(0)
}
let process_wide_row = |_y_plane: &[u16],
_u_plane: &[u16],
_v_plane: &[u16],
_a_plane: &[u16],
_rgba: &mut [u8]| {
let mut _cx = 0usize;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let mut _v_offset = ProcessedOffset { cx: 0, ux: 0 };
{
#[cfg(feature = "avx")]
if use_avx {
use crate::avx2::avx_yuv_p16_to_rgba8_alpha_row;
unsafe {
let offset = avx_yuv_p16_to_rgba8_alpha_row::<
DESTINATION_CHANNELS,
SAMPLING,
ENDIANNESS,
BYTES_POSITION,
BIT_DEPTH,
PRECISION,
>(
_y_plane,
_u_plane,
_v_plane,
_a_plane,
_rgba,
image.width,
&chroma_range,
&i_transform,
_v_offset.cx,
_v_offset.ux,
);
_v_offset = offset;
}
}
#[cfg(feature = "sse")]
if use_sse {
use crate::sse::sse_yuv_p16_to_rgba8_alpha_row;
unsafe {
let offset = sse_yuv_p16_to_rgba8_alpha_row::<
DESTINATION_CHANNELS,
SAMPLING,
ENDIANNESS,
BYTES_POSITION,
BIT_DEPTH,
PRECISION,
>(
_y_plane,
_u_plane,
_v_plane,
_a_plane,
_rgba,
image.width,
&chroma_range,
&i_transform,
_v_offset.cx,
_v_offset.ux,
);
_v_offset = offset;
}
}
}
_cx = _v_offset.cx;
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
unsafe {
let offset = neon_yuv_p16_to_rgba_alpha_row::<
DESTINATION_CHANNELS,
SAMPLING,
ENDIANNESS,
BYTES_POSITION,
BIT_DEPTH,
PRECISION,
>(
_y_plane,
_u_plane,
_v_plane,
_a_plane,
_rgba,
image.width,
&chroma_range,
&i_transform,
0,
0,
);
_cx = offset.cx;
}
}
_cx
};
let process_halved_chroma_row = |y_plane: &[u16],
u_plane: &[u16],
v_plane: &[u16],
a_plane: &[u16],
rgba: &mut [u8]| {
let cx = process_wide_row(y_plane, u_plane, v_plane, a_plane, rgba);
for ((((rgba, y_src), &u_src), &v_src), a_src) in rgba
.chunks_exact_mut(channels * 2)
.zip(y_plane.chunks_exact(2))
.zip(u_plane.iter())
.zip(v_plane.iter())
.zip(a_plane.chunks_exact(2))
.skip(cx / 2)
{
let y_value0 =
(to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32 - bias_y) * y_coef;
let cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(u_src, msb_shift) as i32 - bias_uv;
let cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(v_src, msb_shift) as i32 - bias_uv;
let r0 = qrshr::<BIT_DEPTH>(y_value0 + cr_coef * cr_value);
let b0 = qrshr::<BIT_DEPTH>(y_value0 + cb_coef * cb_value);
let g0 = qrshr::<BIT_DEPTH>(y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value);
let rgba0 = &mut rgba[..channels];
rgba0[dst_chans.get_r_channel_offset()] = r0 as u8;
rgba0[dst_chans.get_g_channel_offset()] = g0 as u8;
rgba0[dst_chans.get_b_channel_offset()] = b0 as u8;
rgba0[dst_chans.get_a_channel_offset()] = (a_src[0] >> store_shift).min(255) as u8;
let y_value1 =
(to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[1], msb_shift) as i32 - bias_y) * y_coef;
let r1 = qrshr::<BIT_DEPTH>(y_value1 + cr_coef * cr_value);
let b1 = qrshr::<BIT_DEPTH>(y_value1 + cb_coef * cb_value);
let g1 = qrshr::<BIT_DEPTH>(y_value1 - g_coef_1 * cr_value - g_coef_2 * cb_value);
let rgba1 = &mut rgba[channels..channels * 2];
rgba1[dst_chans.get_r_channel_offset()] = r1 as u8;
rgba1[dst_chans.get_g_channel_offset()] = g1 as u8;
rgba1[dst_chans.get_b_channel_offset()] = b1 as u8;
rgba1[dst_chans.get_a_channel_offset()] = (a_src[1] >> store_shift).min(255) as u8;
}
if image.width & 1 != 0 {
let y_value0 = (to_ne::<ENDIANNESS, BYTES_POSITION>(*y_plane.last().unwrap(), msb_shift)
as i32
- bias_y)
* y_coef;
let cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(*u_plane.last().unwrap(), msb_shift)
as i32
- bias_uv;
let cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(*v_plane.last().unwrap(), msb_shift)
as i32
- bias_uv;
let a_value = *a_plane.last().unwrap();
let rgba = rgba.chunks_exact_mut(channels).last().unwrap();
let rgba0 = &mut rgba[..channels];
let r0 = qrshr::<BIT_DEPTH>(y_value0 + cr_coef * cr_value);
let b0 = qrshr::<BIT_DEPTH>(y_value0 + cb_coef * cb_value);
let g0 = qrshr::<BIT_DEPTH>(y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value);
rgba0[dst_chans.get_r_channel_offset()] = r0 as u8;
rgba0[dst_chans.get_g_channel_offset()] = g0 as u8;
rgba0[dst_chans.get_b_channel_offset()] = b0 as u8;
rgba0[dst_chans.get_a_channel_offset()] = (a_value >> store_shift).min(255) as u8;
}
};
if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
let iter;
#[cfg(feature = "rayon")]
{
iter = rgba
.par_chunks_exact_mut(rgba_stride as usize)
.zip(image.y_plane.par_chunks_exact(image.y_stride as usize))
.zip(image.a_plane.par_chunks_exact(image.a_stride as usize))
.zip(image.u_plane.par_chunks_exact(image.u_stride as usize))
.zip(image.v_plane.par_chunks_exact(image.v_stride as usize));
}
#[cfg(not(feature = "rayon"))]
{
iter = rgba
.chunks_exact_mut(rgba_stride as usize)
.zip(image.y_plane.chunks_exact(image.y_stride as usize))
.zip(image.a_plane.chunks_exact(image.a_stride as usize))
.zip(image.u_plane.chunks_exact(image.u_stride as usize))
.zip(image.v_plane.chunks_exact(image.v_stride as usize));
}
iter.for_each(|((((rgba, y_plane), a_plane), u_plane), v_plane)| {
let y_plane = &y_plane[..image.width as usize];
let cx = process_wide_row(y_plane, u_plane, v_plane, a_plane, rgba);
for ((((rgba, &y_src), &u_src), &v_src), &a_src) in rgba
.chunks_exact_mut(channels)
.zip(y_plane.iter())
.zip(u_plane.iter())
.zip(v_plane.iter())
.zip(a_plane.iter())
.skip(cx)
{
let y_value = (to_ne::<ENDIANNESS, BYTES_POSITION>(y_src, msb_shift) as i32
- bias_y)
* y_coef;
let cb_value =
to_ne::<ENDIANNESS, BYTES_POSITION>(u_src, msb_shift) as i32 - bias_uv;
let cr_value =
to_ne::<ENDIANNESS, BYTES_POSITION>(v_src, msb_shift) as i32 - bias_uv;
let r = qrshr::<BIT_DEPTH>(y_value + cr_coef * cr_value);
let b = qrshr::<BIT_DEPTH>(y_value + cb_coef * cb_value);
let g = qrshr::<BIT_DEPTH>(y_value - g_coef_1 * cr_value - g_coef_2 * cb_value);
rgba[dst_chans.get_r_channel_offset()] = r as u8;
rgba[dst_chans.get_g_channel_offset()] = g as u8;
rgba[dst_chans.get_b_channel_offset()] = b as u8;
rgba[dst_chans.get_a_channel_offset()] = (a_src >> store_shift).min(255) as u8;
}
});
} else if chroma_subsampling == YuvChromaSubsampling::Yuv422 {
let iter;
#[cfg(feature = "rayon")]
{
iter = rgba
.par_chunks_exact_mut(rgba_stride as usize)
.zip(image.y_plane.par_chunks_exact(image.y_stride as usize))
.zip(image.a_plane.par_chunks_exact(image.a_stride as usize))
.zip(image.u_plane.par_chunks_exact(image.u_stride as usize))
.zip(image.v_plane.par_chunks_exact(image.v_stride as usize));
}
#[cfg(not(feature = "rayon"))]
{
iter = rgba
.chunks_exact_mut(rgba_stride as usize)
.zip(image.y_plane.chunks_exact(image.y_stride as usize))
.zip(image.a_plane.chunks_exact(image.a_stride as usize))
.zip(image.u_plane.chunks_exact(image.u_stride as usize))
.zip(image.v_plane.chunks_exact(image.v_stride as usize));
}
iter.for_each(|((((rgba, y_plane), a_plane), u_plane), v_plane)| {
process_halved_chroma_row(y_plane, u_plane, v_plane, a_plane, rgba);
});
} else if chroma_subsampling == YuvChromaSubsampling::Yuv420 {
let iter;
#[cfg(feature = "rayon")]
{
iter = rgba
.par_chunks_exact_mut(rgba_stride as usize * 2)
.zip(image.y_plane.par_chunks_exact(image.y_stride as usize * 2))
.zip(image.a_plane.par_chunks_exact(image.a_stride as usize * 2))
.zip(image.u_plane.par_chunks_exact(image.u_stride as usize))
.zip(image.v_plane.par_chunks_exact(image.v_stride as usize));
}
#[cfg(not(feature = "rayon"))]
{
iter = rgba
.chunks_exact_mut(rgba_stride as usize * 2)
.zip(image.y_plane.chunks_exact(image.y_stride as usize * 2))
.zip(image.a_plane.chunks_exact(image.a_stride as usize * 2))
.zip(image.u_plane.chunks_exact(image.u_stride as usize))
.zip(image.v_plane.chunks_exact(image.v_stride as usize));
}
iter.for_each(|((((rgba, y_plane), a_plane), u_plane), v_plane)| {
for ((rgba, y_plane), a_plane) in rgba
.chunks_exact_mut(rgba_stride as usize)
.zip(y_plane.chunks_exact(image.y_stride as usize))
.zip(a_plane.chunks_exact(image.a_stride as usize))
{
process_halved_chroma_row(
&y_plane[0..image.width as usize],
&u_plane[0..(image.width as usize).div_ceil(2)],
&v_plane[0..(image.width as usize).div_ceil(2)],
&a_plane[0..image.width as usize],
&mut rgba[0..image.width as usize * channels],
);
}
});
if image.height & 1 != 0 {
let rgba = rgba.chunks_exact_mut(rgba_stride as usize).last().unwrap();
let u_plane = image
.u_plane
.chunks_exact(image.u_stride as usize)
.last()
.unwrap();
let v_plane = image
.v_plane
.chunks_exact(image.v_stride as usize)
.last()
.unwrap();
let a_plane = image
.a_plane
.chunks_exact(image.a_stride as usize)
.last()
.unwrap();
let y_plane = image
.y_plane
.chunks_exact(image.y_stride as usize)
.last()
.unwrap();
process_halved_chroma_row(
&y_plane[..image.width as usize],
&u_plane[..(image.width as usize).div_ceil(2)],
&v_plane[..(image.width as usize).div_ceil(2)],
&a_plane[..image.width as usize],
&mut rgba[..image.width as usize * channels],
);
}
} else {
unreachable!();
}
Ok(())
}
macro_rules! d_cnv {
($method: ident, $px_fmt: expr, $sampling: expr, $endian: expr, $sampling_written: expr, $px_written: expr, $px_written_small: expr, $bit_depth: expr) => {
#[doc = concat!("
Convert ",$sampling_written, " planar format with ", stringify!($bit_depth), " bit pixel format to ", $px_written," 8 bit-depth format with interleaving alpha.
This function takes ", $sampling_written, " planar data with ", stringify!($bit_depth), " bit precision and interleaved provided alpha channel,
and converts it to ", $px_written," format with 8 bit-depth precision per channel.
# Arguments
* `planar_image` - Source ",$sampling_written," planar image.
* `", $px_written_small, "` - A mutable slice to store the converted ", $px_written," 8 bit-depth data.
* `", $px_written_small, "_stride` - The stride (components per row) for ", $px_written," 8 bit-depth data.
* `range` - The YUV range (limited or full).
* `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
* `bit_depth` - Bit depth of source YUV planes, only 10 and 12 is supported.
# Panics
This function panics if the lengths of the planes or the input ", $px_written," data are not valid based
on the specified width, height, and strides, or if invalid YUV range or matrix is provided.")]
pub fn $method(
planar_image_with_alpha: &YuvPlanarImageWithAlpha<u16>,
dst: &mut [u8],
dst_stride: u32,
range: YuvRange,
matrix: YuvStandardMatrix,
) -> Result<(), YuvError> {
yuv_p16_to_image_alpha_ant::<{ $px_fmt as u8 },
{ $sampling as u8 },
{ $endian as u8 },
{ YuvBytesPacking::LeastSignificantBytes as u8 }, $bit_depth>(
planar_image_with_alpha, dst, dst_stride, range, matrix)
}
};
}
d_cnv!(
i010_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv420,
YuvEndianness::LittleEndian,
"I010A",
"RGBA",
"rgba",
10
);
#[cfg(feature = "big_endian")]
d_cnv!(
i010_be_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv420,
YuvEndianness::BigEndian,
"I010ABE",
"RGBA",
"rgba",
10
);
d_cnv!(
i210_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv422,
YuvEndianness::LittleEndian,
"I210A",
"RGBA",
"rgba",
10
);
#[cfg(feature = "big_endian")]
d_cnv!(
i210_alpha_be_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv422,
YuvEndianness::BigEndian,
"I210ABE",
"RGBA",
"rgba",
10
);
d_cnv!(
i410_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv444,
YuvEndianness::LittleEndian,
"I410A",
"RGBA",
"rgba",
10
);
#[cfg(feature = "big_endian")]
d_cnv!(
i410_be_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv444,
YuvEndianness::BigEndian,
"I410ABE",
"RGBA",
"rgba",
10
);
d_cnv!(
i012_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv420,
YuvEndianness::LittleEndian,
"I012A",
"RGBA",
"rgba",
12
);
#[cfg(feature = "big_endian")]
d_cnv!(
i012_be_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv420,
YuvEndianness::BigEndian,
"I012ABE",
"RGBA",
"rgba",
12
);
d_cnv!(
i212_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv422,
YuvEndianness::LittleEndian,
"I212A",
"RGBA",
"rgba",
12
);
#[cfg(feature = "big_endian")]
d_cnv!(
i212_be_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv422,
YuvEndianness::BigEndian,
"I212ABE",
"RGBA",
"rgba",
12
);
d_cnv!(
i412_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv444,
YuvEndianness::LittleEndian,
"I412A",
"RGBA",
"rgba",
12
);
#[cfg(feature = "big_endian")]
d_cnv!(
i412_be_alpha_to_rgba,
YuvSourceChannels::Rgba,
YuvChromaSubsampling::Yuv444,
YuvEndianness::BigEndian,
"I412ABE",
"RGBA",
"rgba",
12
);