use anyhow::{bail, Result};
use std::ptr;
use crate::frame::VideoFrame;
use super::buffers::NvEncLockInputBuffer;
use super::constants::{NV_ENC_LOCK_INPUT_BUFFER_VER, NV_ENC_SUCCESS};
use super::session::EncodeSession;
pub(super) unsafe fn upload_frame(
session: &EncodeSession,
frame: &VideoFrame,
slot: usize,
) -> Result<u32> {
unsafe {
let input_buffer = session.input_buffers[slot];
let mut lock: NvEncLockInputBuffer = std::mem::zeroed();
lock.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
lock.input_buffer = input_buffer;
let rc = (session.fn_lock_input_buffer)(session.encoder, &mut lock);
if rc != NV_ENC_SUCCESS {
bail!("NvEncLockInputBuffer failed: {rc}");
}
let pitch = lock.pitch as usize;
let w = session.width as usize;
let h = session.height as usize;
let cw = w.div_ceil(2);
let ch = h.div_ceil(2);
let y_size = w * h;
let uv_size = cw * ch;
if frame.data.len() < y_size + 2 * uv_size {
(session.fn_unlock_input_buffer)(session.encoder, input_buffer);
bail!("frame data too small for {}x{} YUV420p", w, h);
}
let dst = lock.buffer_data_ptr as *mut u8;
for row in 0..h {
let src = frame.data.as_ptr().add(row * w);
let dst_row = dst.add(row * pitch);
ptr::copy_nonoverlapping(src, dst_row, w);
}
let chroma_pitch = pitch / 2;
let u_dst_base = dst.add(pitch * h);
let u_src_base = frame.data.as_ptr().add(y_size);
for row in 0..ch {
let src = u_src_base.add(row * cw);
let dst_row = u_dst_base.add(row * chroma_pitch);
ptr::copy_nonoverlapping(src, dst_row, cw);
}
let v_dst_base = u_dst_base.add(chroma_pitch * ch);
let v_src_base = u_src_base.add(uv_size);
for row in 0..ch {
let src = v_src_base.add(row * cw);
let dst_row = v_dst_base.add(row * chroma_pitch);
ptr::copy_nonoverlapping(src, dst_row, cw);
}
let rc = (session.fn_unlock_input_buffer)(session.encoder, input_buffer);
if rc != NV_ENC_SUCCESS {
bail!("NvEncUnlockInputBuffer failed: {rc}");
}
Ok(lock.pitch)
}
}
pub(super) unsafe fn upload_frame_10bit(
session: &EncodeSession,
frame: &VideoFrame,
slot: usize,
) -> Result<u32> {
unsafe {
let input_buffer = session.input_buffers[slot];
let mut lock: NvEncLockInputBuffer = std::mem::zeroed();
lock.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
lock.input_buffer = input_buffer;
let rc = (session.fn_lock_input_buffer)(session.encoder, &mut lock);
if rc != NV_ENC_SUCCESS {
bail!("NvEncLockInputBuffer failed: {rc}");
}
let pitch_bytes = lock.pitch as usize;
let w = session.width as usize;
let h = session.height as usize;
let cw = w.div_ceil(2);
let ch = h.div_ceil(2);
let y_bytes = w * h * 2;
let uv_bytes = cw * ch * 2;
if frame.data.len() < y_bytes + 2 * uv_bytes {
(session.fn_unlock_input_buffer)(session.encoder, input_buffer);
bail!(
"frame data too small for {}x{} Yuv420p10le: need {} bytes, got {}",
w,
h,
y_bytes + 2 * uv_bytes,
frame.data.len()
);
}
let dst = lock.buffer_data_ptr as *mut u8;
let src_ptr = frame.data.as_ptr();
for row in 0..h {
let src_row = src_ptr.add(row * w * 2) as *const u16;
let dst_row = dst.add(row * pitch_bytes) as *mut u16;
for col in 0..w {
let sample = (*src_row.add(col)) & 0x03FF;
*dst_row.add(col) = sample << 6;
}
}
let uv_dst_base = dst.add(pitch_bytes * h);
let u_src_base = src_ptr.add(y_bytes) as *const u16;
let v_src_base = src_ptr.add(y_bytes + uv_bytes) as *const u16;
for row in 0..ch {
let u_src_row = u_src_base.add(row * cw);
let v_src_row = v_src_base.add(row * cw);
let dst_row = uv_dst_base.add(row * pitch_bytes) as *mut u16;
for col in 0..cw {
let u = (*u_src_row.add(col)) & 0x03FF;
let v = (*v_src_row.add(col)) & 0x03FF;
*dst_row.add(col * 2) = u << 6;
*dst_row.add(col * 2 + 1) = v << 6;
}
}
let rc = (session.fn_unlock_input_buffer)(session.encoder, input_buffer);
if rc != NV_ENC_SUCCESS {
bail!("NvEncUnlockInputBuffer failed: {rc}");
}
Ok(lock.pitch)
}
}