use std::cmp::min;
use crate::video_frame::{VideoFrame, UV_PLANE, U_PLANE, V_PLANE, Y_PLANE};
use crate::DecodedFormat;
use byteorder::ByteOrder;
use byteorder::LittleEndian;
#[cfg(feature = "v4l2")]
use std::arch::aarch64::*;
pub const MM21_TILE_WIDTH: usize = 16;
pub const MM21_TILE_HEIGHT: usize = 32;
pub fn nv12_copy(
src_y: &[u8],
src_y_stride: usize,
dst_y: &mut [u8],
dst_y_stride: usize,
src_uv: &[u8],
src_uv_stride: usize,
dst_uv: &mut [u8],
dst_uv_stride: usize,
width: usize,
height: usize,
) {
for y in 0..height {
dst_y[(y * dst_y_stride)..(y * dst_y_stride + width)]
.copy_from_slice(&src_y[(y * src_y_stride)..(y * src_y_stride + width)]);
}
for y in 0..(height / 2) {
dst_uv[(y * dst_uv_stride)..(y * dst_uv_stride + width)]
.copy_from_slice(&src_uv[(y * src_uv_stride)..(y * src_uv_stride + width)]);
}
}
pub fn extend_border_nv12(
y_plane: &mut [u8],
uv_plane: &mut [u8],
visible_width: usize,
visible_height: usize,
coded_width: usize,
coded_height: usize,
) {
assert!(visible_width > 1);
assert!(visible_height > 1);
for y in 0..visible_height {
let row_start = y * coded_width;
for x in visible_width..coded_width {
y_plane[row_start + x] = y_plane[row_start + x - 1]
}
}
for y in visible_height..coded_height {
let (src, dst) = y_plane.split_at_mut(y * coded_width);
dst[0..coded_width].copy_from_slice(&src[((y - 1) * coded_width)..(y * coded_width)]);
}
for y in 0..(visible_height / 2) {
let row_start = y * coded_width;
for x in visible_width..coded_width {
uv_plane[row_start + x] = uv_plane[row_start + x - 2]
}
}
for y in (visible_height / 2)..(coded_height / 2) {
let (src, dst) = uv_plane.split_at_mut(y * coded_width);
dst[0..coded_width].copy_from_slice(&src[((y - 1) * coded_width)..(y * coded_width)]);
}
}
pub fn copy_plane(
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
width: usize,
height: usize,
) {
for y in 0..height {
dst[(y * dst_stride)..(y * dst_stride + width)]
.copy_from_slice(&src[(y * src_stride)..(y * src_stride + width)]);
}
}
pub fn i4xx_copy(
src_y: &[u8],
src_y_stride: usize,
dst_y: &mut [u8],
dst_y_stride: usize,
src_u: &[u8],
src_u_stride: usize,
dst_u: &mut [u8],
dst_u_stride: usize,
src_v: &[u8],
src_v_stride: usize,
dst_v: &mut [u8],
dst_v_stride: usize,
width: usize,
height: usize,
(sub_h, sub_v): (bool, bool),
) {
copy_plane(src_y, src_y_stride, dst_y, dst_y_stride, width, height);
let uv_width = if sub_h { (width + 1) / 2 } else { width };
let uv_height = if sub_v { (height + 1) / 2 } else { height };
copy_plane(src_u, src_u_stride, dst_u, dst_u_stride, uv_width, uv_height);
copy_plane(src_v, src_v_stride, dst_v, dst_v_stride, uv_width, uv_height);
}
pub fn y410_to_i410(
src: &[u8],
dst: &mut [u8],
width: usize,
height: usize,
strides: [usize; 3],
offsets: [usize; 3],
) {
let src_lines = src[offsets[0]..].chunks(strides[0]).map(|line| &line[..width * 4]);
let dst_y_size = width * 2 * height;
let dst_u_size = width * 2 * height;
let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size);
let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size);
let dst_y_lines = dst_y_plane.chunks_mut(width * 2);
let dst_u_lines = dst_u_plane.chunks_mut(width * 2);
let dst_v_lines = dst_v_plane.chunks_mut(width * 2);
for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in
src_lines.zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines))).take(height)
{
for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(4).zip(
dst_y_line.chunks_mut(2).zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))),
) {
let y = LittleEndian::read_u16(&[src[1] >> 2 | src[2] << 6, src[2] >> 2 & 0b11]);
let u = LittleEndian::read_u16(&[src[0], src[1] & 0b11]);
let v = LittleEndian::read_u16(&[src[2] >> 4 | src[3] << 4, src[3] >> 4 & 0b11]);
LittleEndian::write_u16(dst_y, y);
LittleEndian::write_u16(dst_u, u);
LittleEndian::write_u16(dst_v, v);
}
}
}
#[cfg(feature = "v4l2")]
pub unsafe fn align_detile(src: *const u8, src_tile_stride: isize, dst: *mut u8, width: usize) {
let mut vin = [0u8; MM21_TILE_WIDTH];
let mut vout = [0u8; MM21_TILE_WIDTH];
let bytes_per_pixel = 1;
let mask = MM21_TILE_WIDTH - 1;
let remainder = width & mask;
let width_aligned_down = width & !mask;
if width_aligned_down > 0 {
detile_row(src, src_tile_stride, dst, width_aligned_down);
}
let index = (width_aligned_down / MM21_TILE_WIDTH * (src_tile_stride as usize)) as usize;
let input_slice =
std::slice::from_raw_parts(src.offset(index as isize), remainder * bytes_per_pixel);
(&mut vin[0..remainder * bytes_per_pixel])
.copy_from_slice(&input_slice[0..remainder * bytes_per_pixel]);
detile_row(vin.as_ptr(), src_tile_stride, vout.as_mut_ptr(), MM21_TILE_WIDTH);
let output_slice = std::slice::from_raw_parts_mut(
dst.offset(width_aligned_down as isize),
remainder * bytes_per_pixel,
);
output_slice[0..remainder * bytes_per_pixel]
.copy_from_slice(&vout[0..remainder * bytes_per_pixel]);
}
#[cfg(feature = "v4l2")]
pub unsafe fn detile_row(
mut src: *const u8,
src_tile_stride: isize,
mut dst: *mut u8,
width: usize,
) {
let mut w = width;
while w > 0 {
let v0: uint8x16_t = vld1q_u8(src);
src = src.offset(src_tile_stride as isize);
w = w - MM21_TILE_WIDTH;
vst1q_u8(dst, v0);
dst = dst.offset(MM21_TILE_WIDTH as isize);
}
}
#[cfg(feature = "v4l2")]
pub fn detile_plane(
src: &[u8],
src_stride: usize,
dst: &mut [u8],
mut dst_stride: isize,
width: usize,
mut height: isize,
tile_height: usize,
) -> Result<(), String> {
let src_tile_stride = (16 * tile_height) as isize;
if width == 0 || height == 0 || (((tile_height) & ((tile_height) - 1)) > 0) {
return Err("Invalid width, height, or tile height is not a power of 2.".to_owned());
}
let mut aligned = true;
if (width & (MM21_TILE_WIDTH - 1)) > 0 {
aligned = false;
}
if src.len() < (src_stride * (height.abs() as usize)) {
return Err("Src buffer not big enough.".to_owned());
}
if dst.len() < (dst_stride * height.abs()) as usize {
return Err("Dst buffer not big enough.".to_owned());
}
let mut src_ptr = src.as_ptr();
let mut dst_ptr = dst.as_mut_ptr();
if height < 0 {
height = -height;
unsafe {
src_ptr = src_ptr.offset(((height - 1) * dst_stride) as isize);
}
dst_stride = -dst_stride;
}
for y in 0..height {
unsafe {
if aligned {
detile_row(src_ptr, src_tile_stride, dst_ptr, width);
} else {
align_detile(src_ptr, src_tile_stride, dst_ptr, width);
}
}
unsafe {
dst_ptr = dst_ptr.offset(dst_stride as isize);
src_ptr = src_ptr.offset(MM21_TILE_WIDTH as isize);
}
if (y & (tile_height - 1) as isize) == ((tile_height - 1) as isize) {
unsafe {
src_ptr = src_ptr.offset(-src_tile_stride + (src_stride * tile_height) as isize);
}
}
}
Ok(())
}
#[cfg(feature = "v4l2")]
pub fn mm21_to_nv12(
src_y: &[u8],
src_stride_y: usize,
dst_y: &mut [u8],
dst_stride_y: usize,
src_uv: &[u8],
src_stride_uv: usize,
dst_uv: &mut [u8],
dst_stride_uv: usize,
width: usize,
height: isize,
) -> Result<(), String> {
if width <= 0 {
return Err("Width must be greater than 0.".to_owned());
}
let sign = if height < 0 { -1 } else { 1 };
detile_plane(
src_y,
src_stride_y,
dst_y,
dst_stride_y as isize,
width,
height,
MM21_TILE_HEIGHT,
)?;
detile_plane(
src_uv,
src_stride_uv,
dst_uv,
dst_stride_uv as isize,
(width + 1) & !1,
(height + sign) / 2,
MM21_TILE_HEIGHT / 2,
)
}
pub fn nv12_to_i420(
src_y: &[u8],
src_y_stride: usize,
dst_y: &mut [u8],
dst_y_stride: usize,
src_uv: &[u8],
src_uv_stride: usize,
dst_u: &mut [u8],
dst_u_stride: usize,
dst_v: &mut [u8],
dst_v_stride: usize,
width: usize,
height: usize,
) {
copy_plane(src_y, src_y_stride, dst_y, dst_y_stride, width, height);
let aligned_width = (width + 1) & (!1);
for y in 0..((height + 1) / 2) {
let src_row = &src_uv[(y * src_uv_stride)..(y * src_uv_stride + aligned_width)];
let dst_u_row = &mut dst_u[(y * dst_u_stride)..(y * dst_u_stride + aligned_width / 2)];
let dst_v_row = &mut dst_v[(y * dst_v_stride)..(y * dst_v_stride + aligned_width / 2)];
for x in 0..aligned_width {
if x % 2 == 0 {
dst_u_row[x / 2] = src_row[x];
} else {
dst_v_row[x / 2] = src_row[x];
}
}
}
}
pub fn i420_to_nv12_chroma(src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8]) {
for i in 0..dst_uv.len() {
if i % 2 == 0 {
dst_uv[i] = src_u[i / 2];
} else {
dst_uv[i] = src_v[i / 2];
}
}
}
pub fn i420_to_nv12(src_y: &[u8], dst_y: &mut [u8], src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8]) {
dst_y.copy_from_slice(src_y);
i420_to_nv12_chroma(src_u, src_v, dst_uv);
}
pub const SUPPORTED_CONVERSION: &'static [(DecodedFormat, DecodedFormat)] = &[
#[cfg(feature = "v4l2")]
(DecodedFormat::MM21, DecodedFormat::NV12),
(DecodedFormat::NV12, DecodedFormat::NV12),
(DecodedFormat::I420, DecodedFormat::I420),
(DecodedFormat::I422, DecodedFormat::I422),
(DecodedFormat::I444, DecodedFormat::I444),
];
pub fn convert_video_frame(src: &impl VideoFrame, dst: &mut impl VideoFrame) -> Result<(), String> {
let width = min(dst.resolution().width, src.resolution().width) as usize;
let height = min(dst.resolution().height, src.resolution().height) as usize;
let conversion = (src.decoded_format()?, dst.decoded_format()?);
let src_pitches = src.get_plane_pitch();
let src_mapping = src.map().expect("Image processor src mapping failed!");
let src_planes = src_mapping.get();
let dst_pitches = dst.get_plane_pitch();
let dst_mapping = dst.map_mut().expect("Image processor dst mapping failed!");
let dst_planes = dst_mapping.get();
match conversion {
#[cfg(feature = "v4l2")]
(DecodedFormat::MM21, DecodedFormat::NV12) => mm21_to_nv12(
src_planes[Y_PLANE],
src_pitches[Y_PLANE],
*dst_planes[Y_PLANE].borrow_mut(),
dst_pitches[Y_PLANE],
src_planes[UV_PLANE],
src_pitches[UV_PLANE],
*dst_planes[UV_PLANE].borrow_mut(),
dst_pitches[UV_PLANE],
width,
height as isize,
),
(DecodedFormat::NV12, DecodedFormat::NV12) => {
nv12_copy(
src_planes[Y_PLANE],
src_pitches[Y_PLANE],
*dst_planes[Y_PLANE].borrow_mut(),
dst_pitches[Y_PLANE],
src_planes[UV_PLANE],
src_pitches[UV_PLANE],
*dst_planes[UV_PLANE].borrow_mut(),
dst_pitches[UV_PLANE],
width,
height,
);
Ok(())
}
(DecodedFormat::I420, DecodedFormat::I420) => {
i4xx_copy(
src_planes[Y_PLANE],
src_pitches[Y_PLANE],
*dst_planes[Y_PLANE].borrow_mut(),
dst_pitches[Y_PLANE],
src_planes[U_PLANE],
src_pitches[U_PLANE],
*dst_planes[U_PLANE].borrow_mut(),
dst_pitches[U_PLANE],
src_planes[V_PLANE],
src_pitches[V_PLANE],
*dst_planes[V_PLANE].borrow_mut(),
dst_pitches[V_PLANE],
width,
height,
(true, true),
);
Ok(())
}
(DecodedFormat::I422, DecodedFormat::I422) => {
i4xx_copy(
src_planes[Y_PLANE],
src_pitches[Y_PLANE],
*dst_planes[Y_PLANE].borrow_mut(),
dst_pitches[Y_PLANE],
src_planes[U_PLANE],
src_pitches[U_PLANE],
*dst_planes[U_PLANE].borrow_mut(),
dst_pitches[U_PLANE],
src_planes[V_PLANE],
src_pitches[V_PLANE],
*dst_planes[V_PLANE].borrow_mut(),
dst_pitches[V_PLANE],
width,
height,
(true, false),
);
Ok(())
}
(DecodedFormat::I444, DecodedFormat::I444) => {
i4xx_copy(
src_planes[Y_PLANE],
src_pitches[Y_PLANE],
*dst_planes[Y_PLANE].borrow_mut(),
dst_pitches[Y_PLANE],
src_planes[U_PLANE],
src_pitches[U_PLANE],
*dst_planes[U_PLANE].borrow_mut(),
dst_pitches[U_PLANE],
src_planes[V_PLANE],
src_pitches[V_PLANE],
*dst_planes[V_PLANE].borrow_mut(),
dst_pitches[V_PLANE],
width,
height,
(false, false),
);
Ok(())
}
_ => Err(format!("Unsupported conversion {:?} -> {:?}", conversion.0, conversion.1)),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[cfg(feature = "v4l2")]
fn test_mm21_to_nv12() {
let test_input = include_bytes!("test_data/puppets-480x270_20230825.mm21.yuv");
let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
let mut test_output = [0u8; 480 * 288 * 3 / 2];
let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
mm21_to_nv12(
&test_input[0..480 * 288],
480,
test_y_output,
480,
&test_input[480 * 288..480 * 288 * 3 / 2],
480,
test_uv_output,
480,
480,
288,
)
.expect("Failed to detile!");
assert_eq!(test_output, *test_expected_output);
}
#[test]
fn test_nv12_to_i420() {
let test_input = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.i420.yuv");
let mut test_output = [0u8; 480 * 288 * 3 / 2];
let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
let (test_u_output, test_v_output) = test_uv_output.split_at_mut(480 * 288 / 4);
nv12_to_i420(
&test_input[0..480 * 288],
test_y_output,
&test_input[480 * 288..480 * 288 * 3 / 2],
test_u_output,
test_v_output,
);
assert_eq!(test_output, *test_expected_output);
}
#[test]
fn test_i420_to_nv12() {
let test_input = include_bytes!("test_data/puppets-480x270_20230825.i420.yuv");
let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
let mut test_output = [0u8; 480 * 288 * 3 / 2];
let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
i420_to_nv12(
&test_input[0..(480 * 288)],
test_y_output,
&test_input[(480 * 288)..(480 * 288 * 5 / 4)],
&test_input[(480 * 288 * 5 / 4)..(480 * 288 * 3 / 2)],
test_uv_output,
);
assert_eq!(test_output, *test_expected_output);
}
}