fn mulhi(v: u8, coeff: u16) -> i32 {
((u32::from(v) * u32::from(coeff)) >> 8) as i32
}
#[allow(clippy::manual_clamp)]
fn clip(v: i32) -> u8 {
const YUV_FIX2: i32 = 6;
(v >> YUV_FIX2).max(0).min(255) as u8
}
#[inline(always)]
fn yuv_to_r(y: u8, v: u8) -> u8 {
clip(mulhi(y, 19077) + mulhi(v, 26149) - 14234)
}
#[inline(always)]
fn yuv_to_g(y: u8, u: u8, v: u8) -> u8 {
clip(mulhi(y, 19077) - mulhi(u, 6419) - mulhi(v, 13320) + 8708)
}
#[inline(always)]
fn yuv_to_b(y: u8, u: u8) -> u8 {
clip(mulhi(y, 19077) + mulhi(u, 33050) - 17685)
}
pub(crate) fn fill_rgb_buffer_fancy<const BPP: usize>(
buffer: &mut [u8],
y_buffer: &[u8],
u_buffer: &[u8],
v_buffer: &[u8],
width: usize,
height: usize,
buffer_width: usize,
) {
let chroma_buffer_width = buffer_width / 2;
let chroma_width = width.div_ceil(2);
let top_row_y = &y_buffer[..width];
let top_row_u = &u_buffer[..chroma_width];
let top_row_v = &v_buffer[..chroma_width];
let top_row_buffer = &mut buffer[..width * BPP];
fill_row_fancy_with_1_uv_row::<BPP>(top_row_buffer, top_row_y, top_row_u, top_row_v);
let mut main_row_chunks = buffer[width * BPP..].chunks_exact_mut(width * BPP * 2);
let end_y_index = height * buffer_width;
let mut main_y_chunks = y_buffer[buffer_width..end_y_index].chunks_exact(buffer_width * 2);
let mut main_u_windows = u_buffer
.windows(chroma_buffer_width * 2)
.step_by(chroma_buffer_width);
let mut main_v_windows = v_buffer
.windows(chroma_buffer_width * 2)
.step_by(chroma_buffer_width);
for (((row_buffer, y_rows), u_rows), v_rows) in (&mut main_row_chunks)
.zip(&mut main_y_chunks)
.zip(&mut main_u_windows)
.zip(&mut main_v_windows)
{
let (u_row_1, u_row_2) = u_rows.split_at(chroma_buffer_width);
let (v_row_1, v_row_2) = v_rows.split_at(chroma_buffer_width);
let (row_buf_1, row_buf_2) = row_buffer.split_at_mut(width * BPP);
let (y_row_1, y_row_2) = y_rows.split_at(buffer_width);
fill_row_fancy_with_2_uv_rows::<BPP>(
row_buf_1,
&y_row_1[..width],
&u_row_1[..chroma_width],
&u_row_2[..chroma_width],
&v_row_1[..chroma_width],
&v_row_2[..chroma_width],
);
fill_row_fancy_with_2_uv_rows::<BPP>(
row_buf_2,
&y_row_2[..width],
&u_row_2[..chroma_width],
&u_row_1[..chroma_width],
&v_row_2[..chroma_width],
&v_row_1[..chroma_width],
);
}
let final_row_buffer = main_row_chunks.into_remainder();
if !final_row_buffer.is_empty() {
let final_y_row = main_y_chunks.remainder();
let chroma_height = height.div_ceil(2);
let start_chroma_index = (chroma_height - 1) * chroma_buffer_width;
let final_u_row = &u_buffer[start_chroma_index..];
let final_v_row = &v_buffer[start_chroma_index..];
fill_row_fancy_with_1_uv_row::<BPP>(
final_row_buffer,
&final_y_row[..width],
&final_u_row[..chroma_width],
&final_v_row[..chroma_width],
);
}
}
fn fill_row_fancy_with_2_uv_rows<const BPP: usize>(
row_buffer: &mut [u8],
y_row: &[u8],
u_row_1: &[u8],
u_row_2: &[u8],
v_row_1: &[u8],
v_row_2: &[u8],
) {
{
let rgb1 = &mut row_buffer[0..3];
let y_value = y_row[0];
let u_value = get_fancy_chroma_value(u_row_1[0], u_row_1[0], u_row_2[0], u_row_2[0]);
let v_value = get_fancy_chroma_value(v_row_1[0], v_row_1[0], v_row_2[0], v_row_2[0]);
set_pixel(rgb1, y_value, u_value, v_value);
}
let rest_row_buffer = &mut row_buffer[BPP..];
let rest_y_row = &y_row[1..];
let mut main_row_chunks = rest_row_buffer.chunks_exact_mut(BPP * 2);
let mut main_y_chunks = rest_y_row.chunks_exact(2);
for (((((rgb, y_val), u_val_1), u_val_2), v_val_1), v_val_2) in (&mut main_row_chunks)
.zip(&mut main_y_chunks)
.zip(u_row_1.windows(2))
.zip(u_row_2.windows(2))
.zip(v_row_1.windows(2))
.zip(v_row_2.windows(2))
{
{
let rgb1 = &mut rgb[0..3];
let y_value = y_val[0];
let u_value = get_fancy_chroma_value(u_val_1[0], u_val_1[1], u_val_2[0], u_val_2[1]);
let v_value = get_fancy_chroma_value(v_val_1[0], v_val_1[1], v_val_2[0], v_val_2[1]);
set_pixel(rgb1, y_value, u_value, v_value);
}
{
let rgb2 = &mut rgb[BPP..];
let y_value = y_val[1];
let u_value = get_fancy_chroma_value(u_val_1[1], u_val_1[0], u_val_2[1], u_val_2[0]);
let v_value = get_fancy_chroma_value(v_val_1[1], v_val_1[0], v_val_2[1], v_val_2[0]);
set_pixel(rgb2, y_value, u_value, v_value);
}
}
let final_pixel = main_row_chunks.into_remainder();
let final_y = main_y_chunks.remainder();
if let (rgb, [y_value]) = (final_pixel, final_y) {
let final_u_1 = *u_row_1.last().unwrap();
let final_u_2 = *u_row_2.last().unwrap();
let final_v_1 = *v_row_1.last().unwrap();
let final_v_2 = *v_row_2.last().unwrap();
let rgb1 = &mut rgb[0..3];
let u_value = get_fancy_chroma_value(final_u_1, final_u_1, final_u_2, final_u_2);
let v_value = get_fancy_chroma_value(final_v_1, final_v_1, final_v_2, final_v_2);
set_pixel(rgb1, *y_value, u_value, v_value);
}
}
fn fill_row_fancy_with_1_uv_row<const BPP: usize>(
row_buffer: &mut [u8],
y_row: &[u8],
u_row: &[u8],
v_row: &[u8],
) {
{
let rgb1 = &mut row_buffer[0..3];
let y_value = y_row[0];
let u_value = u_row[0];
let v_value = v_row[0];
set_pixel(rgb1, y_value, u_value, v_value);
}
let mut main_row_chunks = row_buffer[BPP..].chunks_exact_mut(BPP * 2);
let mut main_y_row_chunks = y_row[1..].chunks_exact(2);
for (((rgb, y_val), u_val), v_val) in (&mut main_row_chunks)
.zip(&mut main_y_row_chunks)
.zip(u_row.windows(2))
.zip(v_row.windows(2))
{
{
let rgb1 = &mut rgb[0..3];
let y_value = y_val[0];
let u_value = get_fancy_chroma_value(u_val[0], u_val[1], u_val[0], u_val[1]);
let v_value = get_fancy_chroma_value(v_val[0], v_val[1], v_val[0], v_val[1]);
set_pixel(rgb1, y_value, u_value, v_value);
}
{
let rgb2 = &mut rgb[BPP..];
let y_value = y_val[1];
let u_value = get_fancy_chroma_value(u_val[1], u_val[0], u_val[1], u_val[0]);
let v_value = get_fancy_chroma_value(v_val[1], v_val[0], v_val[1], v_val[0]);
set_pixel(rgb2, y_value, u_value, v_value);
}
}
let final_pixel = main_row_chunks.into_remainder();
let final_y = main_y_row_chunks.remainder();
if let (rgb, [final_y]) = (final_pixel, final_y) {
let final_u = *u_row.last().unwrap();
let final_v = *v_row.last().unwrap();
set_pixel(rgb, *final_y, final_u, final_v);
}
}
#[inline]
fn get_fancy_chroma_value(main: u8, secondary1: u8, secondary2: u8, tertiary: u8) -> u8 {
let val0 = u16::from(main);
let val1 = u16::from(secondary1);
let val2 = u16::from(secondary2);
let val3 = u16::from(tertiary);
((9 * val0 + 3 * val1 + 3 * val2 + val3 + 8) / 16) as u8
}
#[inline]
fn set_pixel(rgb: &mut [u8], y: u8, u: u8, v: u8) {
rgb[0] = yuv_to_r(y, v);
rgb[1] = yuv_to_g(y, u, v);
rgb[2] = yuv_to_b(y, u);
}
#[allow(unused)]
pub(crate) fn fill_rgb_buffer_simple<const BPP: usize>(
buffer: &mut [u8],
y_buffer: &[u8],
u_buffer: &[u8],
v_buffer: &[u8],
width: usize,
chroma_width: usize,
buffer_width: usize,
) {
let u_row_twice_iter = u_buffer
.chunks_exact(buffer_width / 2)
.flat_map(|n| std::iter::repeat(n).take(2));
let v_row_twice_iter = v_buffer
.chunks_exact(buffer_width / 2)
.flat_map(|n| std::iter::repeat(n).take(2));
for (((row, y_row), u_row), v_row) in buffer
.chunks_exact_mut(width * BPP)
.zip(y_buffer.chunks_exact(buffer_width))
.zip(u_row_twice_iter)
.zip(v_row_twice_iter)
{
fill_rgba_row_simple::<BPP>(
&y_row[..width],
&u_row[..chroma_width],
&v_row[..chroma_width],
row,
);
}
}
fn fill_rgba_row_simple<const BPP: usize>(
y_vec: &[u8],
u_vec: &[u8],
v_vec: &[u8],
rgba: &mut [u8],
) {
let mut rgb_chunks = rgba.chunks_exact_mut(BPP * 2);
let mut y_chunks = y_vec.chunks_exact(2);
let mut u_iter = u_vec.iter();
let mut v_iter = v_vec.iter();
for (((rgb, y), &u), &v) in (&mut rgb_chunks)
.zip(&mut y_chunks)
.zip(&mut u_iter)
.zip(&mut v_iter)
{
let coeffs = [
mulhi(v, 26149),
mulhi(u, 6419),
mulhi(v, 13320),
mulhi(u, 33050),
];
let get_r = |y: u8| clip(mulhi(y, 19077) + coeffs[0] - 14234);
let get_g = |y: u8| clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708);
let get_b = |y: u8| clip(mulhi(y, 19077) + coeffs[3] - 17685);
let rgb1 = &mut rgb[0..3];
rgb1[0] = get_r(y[0]);
rgb1[1] = get_g(y[0]);
rgb1[2] = get_b(y[0]);
let rgb2 = &mut rgb[BPP..];
rgb2[0] = get_r(y[1]);
rgb2[1] = get_g(y[1]);
rgb2[2] = get_b(y[1]);
}
let remainder = rgb_chunks.into_remainder();
if remainder.len() >= 3 {
if let (Some(&y), Some(&u), Some(&v)) = (
y_chunks.remainder().iter().next(),
u_iter.next(),
v_iter.next(),
) {
let coeffs = [
mulhi(v, 26149),
mulhi(u, 6419),
mulhi(v, 13320),
mulhi(u, 33050),
];
remainder[0] = clip(mulhi(y, 19077) + coeffs[0] - 14234);
remainder[1] = clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708);
remainder[2] = clip(mulhi(y, 19077) + coeffs[3] - 17685);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fancy_grid() {
#[rustfmt::skip]
let y_buffer = [
77, 162, 202, 185,
28, 13, 199, 182,
135, 147, 164, 135,
66, 27, 171, 130,
];
#[rustfmt::skip]
let u_buffer = [
34, 101,
123, 163
];
#[rustfmt::skip]
let v_buffer = [
97, 167,
149, 23,
];
let mut rgb_buffer = [0u8; 16 * 3];
fill_rgb_buffer_fancy::<3>(&mut rgb_buffer, &y_buffer, &u_buffer, &v_buffer, 4, 4, 4);
#[rustfmt::skip]
let upsampled_u_buffer = [
34, 51, 84, 101,
56, 71, 101, 117,
101, 112, 136, 148,
123, 133, 153, 163,
];
#[rustfmt::skip]
let upsampled_v_buffer = [
97, 115, 150, 167,
110, 115, 126, 131,
136, 117, 78, 59,
149, 118, 55, 23,
];
let mut upsampled_rgb_buffer = [0u8; 16 * 3];
for (((rgb_val, y), u), v) in upsampled_rgb_buffer
.chunks_exact_mut(3)
.zip(y_buffer)
.zip(upsampled_u_buffer)
.zip(upsampled_v_buffer)
{
rgb_val[0] = yuv_to_r(y, v);
rgb_val[1] = yuv_to_g(y, u, v);
rgb_val[2] = yuv_to_b(y, u);
}
assert_eq!(rgb_buffer, upsampled_rgb_buffer);
}
#[test]
fn test_yuv_conversions() {
let (y, u, v) = (203, 40, 42);
assert_eq!(yuv_to_r(y, v), 80);
assert_eq!(yuv_to_g(y, u, v), 255);
assert_eq!(yuv_to_b(y, u), 40);
}
}