use anyhow::{Context, Result, bail};
use bytes::Bytes;
use std::collections::VecDeque;
use std::ffi::c_void;
use std::os::raw::{c_int, c_uchar, c_uint, c_ulong, c_ulonglong};
use std::panic::{AssertUnwindSafe, catch_unwind};
use std::ptr;
use std::sync::{Arc, Mutex};
use super::Decoder;
use crate::frame::{ColorMetadata, ColorSpace, PixelFormat, StreamInfo, TransferFn, VideoFrame};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NvdecError {
UnsupportedChroma {
chroma_format: c_int,
label: &'static str,
width: u32,
height: u32,
},
UnsupportedPixelFormat { bit_depth: u8 },
UnsupportedByHardware { reason: String },
}
impl std::fmt::Display for NvdecError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnsupportedChroma {
chroma_format,
label,
width,
height,
} => write!(
f,
"NVDEC reject: chroma_format={} ({}) at {}x{} — only 4:2:0 supported",
chroma_format, label, width, height
),
Self::UnsupportedPixelFormat { bit_depth } => write!(
f,
"NVDEC reject: {}-bit content — only 8/10/12-bit 4:2:0 supported",
bit_depth
),
Self::UnsupportedByHardware { reason } => {
write!(f, "NVDEC reject: GPU capability — {reason}")
}
}
}
}
impl std::error::Error for NvdecError {}
type CUresult = c_int;
type CUdevice = c_int;
type CUcontext = *mut c_void;
type CUdeviceptr = c_ulonglong;
type FnCuInit = unsafe extern "C" fn(c_uint) -> CUresult;
type FnCuDeviceGet = unsafe extern "C" fn(*mut CUdevice, c_int) -> CUresult;
type FnCuCtxCreate = unsafe extern "C" fn(*mut CUcontext, c_uint, CUdevice) -> CUresult;
type FnCuCtxDestroy = unsafe extern "C" fn(CUcontext) -> CUresult;
type FnCuCtxPushCurrent = unsafe extern "C" fn(CUcontext) -> CUresult;
type FnCuCtxPopCurrent = unsafe extern "C" fn(*mut CUcontext) -> CUresult;
type FnCuMemcpy2D = unsafe extern "C" fn(*const CudaMemcpy2D) -> CUresult;
const CU_MEMORYTYPE_HOST: c_uint = 1;
const CU_MEMORYTYPE_DEVICE: c_uint = 2;
#[repr(C)]
struct CudaMemcpy2D {
src_x_in_bytes: usize,
src_y: usize,
src_memory_type: c_uint,
src_host: *const c_void,
src_device: CUdeviceptr,
src_array: *const c_void,
src_pitch: usize,
dst_x_in_bytes: usize,
dst_y: usize,
dst_memory_type: c_uint,
dst_host: *mut c_void,
dst_device: CUdeviceptr,
dst_array: *const c_void,
dst_pitch: usize,
width_in_bytes: usize,
height: usize,
}
type CUvideoparser = *mut c_void;
type CUvideodecoder = *mut c_void;
#[repr(C)]
#[derive(Clone, Copy)]
pub struct CuVideoFormat {
pub codec: c_int,
pub frame_rate_num: c_uint,
pub frame_rate_den: c_uint,
pub progressive_sequence: u8,
pub bit_depth_luma_minus8: u8,
pub bit_depth_chroma_minus8: u8,
pub min_num_decode_surfaces: u8,
pub coded_width: c_uint,
pub coded_height: c_uint,
pub display_area_left: c_int,
pub display_area_top: c_int,
pub display_area_right: c_int,
pub display_area_bottom: c_int,
pub chroma_format: c_int,
pub bitrate: c_uint,
pub display_aspect_num: c_int,
pub display_aspect_den: c_int,
pub video_signal_description: [u8; 8],
pub seqhdr_data_length: c_uint,
pub _reserved_tail: [u8; 1024],
}
#[repr(C)]
struct CuVideoParserParams {
codec_type: c_int,
max_num_decode_surfaces: c_uint,
clock_rate: c_uint,
error_threshold: c_uint,
max_display_delay: c_uint,
reserved1: [c_uint; 5],
user_data: *mut c_void,
pfn_sequence_callback: Option<unsafe extern "C" fn(*mut c_void, *mut CuVideoFormat) -> c_int>,
pfn_decode_picture: Option<unsafe extern "C" fn(*mut c_void, *mut CuVideoPicParams) -> c_int>,
pfn_display_picture: Option<unsafe extern "C" fn(*mut c_void, *mut CuVideoDispInfo) -> c_int>,
pfn_get_operating_point: Option<unsafe extern "C" fn(*mut c_void, *mut c_void) -> c_int>,
pfn_get_sei_msg: Option<unsafe extern "C" fn(*mut c_void, *mut c_void) -> c_int>,
reserved2: [*mut c_void; 5],
ext_video_info: *mut c_void,
}
#[repr(C)]
struct CuVideoSourceDataPacket {
flags: c_ulong,
payload_size: c_ulong,
payload: *const u8,
timestamp: c_ulonglong,
}
#[repr(C)]
struct CuVideoDecodeCreateInfo {
code_width: c_ulong,
coded_height: c_ulong,
num_decode_surfaces: c_ulong,
codec_type: c_int,
chroma_format: c_int,
creation_flags: c_ulong,
bit_depth_minus8: c_ulong,
intra_decode_only: c_ulong,
max_width: c_ulong,
max_height: c_ulong,
reserved1: c_ulong,
display_area_left: i16,
display_area_top: i16,
display_area_right: i16,
display_area_bottom: i16,
output_format: c_int,
deinterlace_mode: c_int,
target_width: c_ulong,
target_height: c_ulong,
num_output_surfaces: c_ulong,
vid_lock: *mut c_void,
target_rect_left: i16,
target_rect_top: i16,
target_rect_right: i16,
target_rect_bottom: i16,
enable_histogram: c_ulong,
reserved2: [c_ulong; 4],
}
#[repr(C)]
struct CuVideoPicParams {
pic_width_in_mbs: c_int,
pic_height_in_mbs: c_int,
curr_pic_idx: c_int,
field_pic_flag: c_int,
bottom_field_flag: c_int,
second_field: c_int,
n_bitstream_data_len: c_uint,
p_bitstream_data: *const u8,
n_num_slices: c_uint,
p_slice_data_offsets: *const c_uint,
ref_pic_flag: c_int,
intra_pic_flag: c_int,
reserved: [c_uint; 30],
codec_specific: [c_uint; 1024],
}
#[repr(C)]
struct CuVideoDispInfo {
picture_index: c_int,
progressive_frame: c_int,
top_field_first: c_int,
repeat_first_field: c_int,
timestamp: c_ulonglong,
}
#[repr(C)]
#[allow(dead_code)]
struct CuVideoH264DpbEntry {
pic_idx: c_int,
frame_idx: c_int,
is_long_term: c_int,
not_existing: c_int,
used_for_reference: c_int,
field_order_cnt: [c_int; 2],
}
const _: () = assert!(std::mem::size_of::<CuVideoH264DpbEntry>() == 28);
const _: () = assert!(std::mem::size_of::<[CuVideoH264DpbEntry; 16]>() == 448);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoH264PicParamsShape {
sps_pps_scalars: [c_int; 32],
dpb: [CuVideoH264DpbEntry; 16],
weight_scale_4x4: [[u8; 16]; 6],
weight_scale_8x8: [[u8; 64]; 2],
fmo_aso_extras: [u8; 256],
reserved_tail: [u8; 1024],
}
const _: () = assert!(std::mem::size_of::<CuVideoH264PicParamsShape>() <= 4096);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoHevcPicParamsShape {
sps_pps_scalars: [c_int; 64],
ref_pic_idx: [c_int; 16],
pic_order_cnt_val: [c_int; 16],
is_long_term: [c_uchar; 16],
rps_sets: [[c_uchar; 8]; 3],
scaling_list_4x4: [[c_uchar; 16]; 6],
scaling_list_8x8: [[c_uchar; 64]; 6],
scaling_list_16x16: [[c_uchar; 64]; 6],
scaling_list_32x32: [[c_uchar; 64]; 2],
scaling_list_dc_16x16: [c_uchar; 6],
scaling_list_dc_32x32: [c_uchar; 2],
reserved_tail: [u8; 256],
}
const _: () = assert!(std::mem::size_of::<CuVideoHevcPicParamsShape>() <= 4096);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoAv1PicParamsShape {
seq_header_scalars: [c_int; 32],
ref_frame_map: [c_int; 8],
tile_col_start_sb: [c_int; 64],
tile_row_start_sb: [c_int; 64],
loop_filter: [c_int; 16],
film_grain: [u8; 512],
reserved_tail: [u8; 256],
}
const _: () = assert!(std::mem::size_of::<CuVideoAv1PicParamsShape>() <= 4096);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoVp9PicParamsShape {
profile_and_scalars: [c_int; 32],
ref_frame_map: [c_int; 8],
probs: [u8; 384],
reserved_tail: [u8; 128],
}
const _: () = assert!(std::mem::size_of::<CuVideoVp9PicParamsShape>() <= 4096);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoVp8PicParamsShape {
profile_and_scalars: [c_int; 16],
last_ref: c_int,
golden_ref: c_int,
alt_ref: c_int,
tables: [u8; 256],
reserved_tail: [u8; 64],
}
const _: () = assert!(std::mem::size_of::<CuVideoVp8PicParamsShape>() <= 4096);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoMpeg2PicParamsShape {
forward_ref_pic_idx: c_int,
backward_ref_pic_idx: c_int,
picture_coding_type: c_int,
full_pel_forward_vector: c_int,
full_pel_backward_vector: c_int,
f_code: [[c_int; 2]; 2],
intra_dc_precision: c_int,
frame_pred_frame_dct: c_int,
concealment_motion_vectors: c_int,
q_scale_type: c_int,
intra_vlc_format: c_int,
alternate_scan: c_int,
top_field_first: c_int,
quant_matrix_intra: [c_uchar; 64],
quant_matrix_inter: [c_uchar; 64],
reserved_tail: [u8; 32],
}
const _: () = assert!(std::mem::size_of::<CuVideoMpeg2PicParamsShape>() <= 4096);
#[repr(C)]
#[allow(dead_code)]
struct CuVideoMpeg4PicParamsShape {
forward_ref_pic_idx: c_int,
backward_ref_pic_idx: c_int,
vop_time_increment_resolution: c_int,
vop_coding_type: c_int,
interlaced: c_int,
quant_type: c_int,
quarter_sample: c_int,
short_video_header: c_int,
divx_flags: c_int,
top_field_first: c_int,
rounding_control: c_int,
alternate_vertical_scan_flag: c_int,
quant_matrix_intra: [c_uchar; 64],
quant_matrix_inter: [c_uchar; 64],
reserved_tail: [u8; 32],
}
const _: () = assert!(std::mem::size_of::<CuVideoMpeg4PicParamsShape>() <= 4096);
#[repr(C)]
struct CuVideoProcParams {
progressive_frame: c_int,
second_field: c_int,
top_field_first: c_int,
unpaired_field: c_int,
reserved_flags: c_uint,
reserved_zero: c_uint,
raw_input_dptr: c_ulonglong,
raw_input_pitch: c_uint,
raw_input_format: c_uint,
raw_output_dptr: c_ulonglong,
raw_output_pitch: c_uint,
reserved1: c_uint,
output_stream: *mut c_void,
reserved: [c_uint; 46],
histogram_dptr: *mut c_void,
reserved2: [*mut c_void; 1],
}
const _: () = assert!(std::mem::size_of::<CuVideoParserParams>() == 136);
const _: () = assert!(std::mem::size_of::<CuVideoFormat>() >= 72);
const _: () = assert!(std::mem::size_of::<CuVideoPicParams>() == 4280);
const _: () = assert!(std::mem::size_of::<[c_uint; 1024]>() == 4096);
const _: () = assert!(std::mem::size_of::<CuVideoDispInfo>() == 24);
#[cfg(target_os = "windows")]
const _: () = assert!(std::mem::size_of::<CuVideoSourceDataPacket>() == 24);
#[cfg(all(target_os = "linux", target_pointer_width = "64"))]
const _: () = assert!(std::mem::size_of::<CuVideoSourceDataPacket>() == 32);
#[cfg(target_os = "windows")]
const _: () = assert!(std::mem::size_of::<CuVideoDecodeCreateInfo>() == 112);
const _: () = assert!(std::mem::size_of::<CuVideoProcParams>() == 264);
type FnCuvidCreateVideoParser =
unsafe extern "C" fn(*mut CUvideoparser, *mut CuVideoParserParams) -> CUresult;
type FnCuvidParseVideoData =
unsafe extern "C" fn(CUvideoparser, *mut CuVideoSourceDataPacket) -> CUresult;
type FnCuvidDestroyVideoParser = unsafe extern "C" fn(CUvideoparser) -> CUresult;
type FnCuvidCreateDecoder =
unsafe extern "C" fn(*mut CUvideodecoder, *mut CuVideoDecodeCreateInfo) -> CUresult;
type FnCuvidDestroyDecoder = unsafe extern "C" fn(CUvideodecoder) -> CUresult;
type FnCuvidDecodePicture = unsafe extern "C" fn(CUvideodecoder, *mut CuVideoPicParams) -> CUresult;
type FnCuvidMapVideoFrame = unsafe extern "C" fn(
CUvideodecoder,
c_int,
*mut CUdeviceptr,
*mut c_uint,
*mut CuVideoProcParams,
) -> CUresult;
type FnCuvidUnmapVideoFrame = unsafe extern "C" fn(CUvideodecoder, CUdeviceptr) -> CUresult;
type FnCuvidGetDecoderCaps = unsafe extern "C" fn(*mut CuVideoDecodeCaps) -> CUresult;
#[repr(C)]
#[derive(Clone, Copy)]
struct CuVideoDecodeCaps {
codec_type: c_int,
chroma_format: c_int,
bit_depth_minus8: u32,
reserved1: [u32; 3],
is_supported: u8,
num_nvdecs: u8,
output_format_mask: u16,
max_width: u32,
max_height: u32,
max_mb_count: u32,
min_width: u16,
min_height: u16,
num_output_surfaces: u8,
reserved2: [u8; 3],
reserved3: [u32; 8],
}
const _: () = assert!(std::mem::size_of::<CuVideoDecodeCaps>() == 80);
const CUVID_H264: c_int = 4;
const CUVID_HEVC: c_int = 8;
const CUVID_VP8: c_int = 9;
const CUVID_VP9: c_int = 10;
const CUVID_AV1: c_int = 11;
const CUVID_MPEG2: c_int = 1;
const CUVID_MPEG4: c_int = 3;
const CUVID_PKT_ENDOFSTREAM: c_ulong = 1;
const CUVID_PKT_TIMESTAMP: c_ulong = 2;
const CUVID_FMT_NV12: c_int = 0;
const CUVID_FMT_P016: c_int = 1;
const CUVID_CHROMA_420: c_int = 1;
const CUVID_CREATE_PREFER_CUVID: c_ulong = 0x01;
fn codec_to_cuvid(codec: &str) -> Option<c_int> {
match codec {
"h264" | "avc1" | "avc" => Some(CUVID_H264),
"h265" | "hevc" | "hvc1" | "hev1" => Some(CUVID_HEVC),
"vp8" => Some(CUVID_VP8),
"vp9" | "vp09" => Some(CUVID_VP9),
"av1" | "av01" => Some(CUVID_AV1),
"mpeg2" | "mpeg2video" => Some(CUVID_MPEG2),
"mpeg4" | "mp4v" => Some(CUVID_MPEG4),
_ => None,
}
}
pub fn validate_format(
chroma_format: c_int,
bit_depth_luma_minus8: u8,
coded_width: u32,
coded_height: u32,
) -> Option<NvdecError> {
if chroma_format != CUVID_CHROMA_420 {
let label: &'static str = match chroma_format {
0 => "Monochrome",
2 => "4:2:2",
3 => "4:4:4",
_ => "unknown",
};
return Some(NvdecError::UnsupportedChroma {
chroma_format,
label,
width: coded_width,
height: coded_height,
});
}
let bit_depth = bit_depth_luma_minus8 + 8;
if bit_depth > 12 {
return Some(NvdecError::UnsupportedPixelFormat { bit_depth });
}
None
}
pub fn deinterleave_p016_to_yuv420p10le(p016_bytes: &[u8], w: usize, h: usize) -> Vec<u8> {
let cw = w.div_ceil(2);
let ch = h.div_ceil(2);
let uv_pairs = cw * ch;
let y_bytes = w * h * 2;
let mut out = Vec::with_capacity(y_bytes + uv_pairs * 4);
let y_src = &p016_bytes[..y_bytes.min(p016_bytes.len())];
for chunk in y_src.chunks_exact(2) {
let sample = u16::from_le_bytes([chunk[0], chunk[1]]);
out.extend_from_slice(&(sample >> 6).to_le_bytes());
}
if out.len() < y_bytes {
out.resize(y_bytes, 0);
}
if p016_bytes.len() > y_bytes {
let uv = &p016_bytes[y_bytes..];
let mut u = Vec::with_capacity(uv_pairs * 2);
let mut v = Vec::with_capacity(uv_pairs * 2);
for i in 0..uv_pairs {
let base = i * 4;
if base + 3 < uv.len() {
let us = u16::from_le_bytes([uv[base], uv[base + 1]]) >> 6;
let vs = u16::from_le_bytes([uv[base + 2], uv[base + 3]]) >> 6;
u.extend_from_slice(&us.to_le_bytes());
v.extend_from_slice(&vs.to_le_bytes());
}
}
out.extend_from_slice(&u);
out.extend_from_slice(&v);
}
out
}
#[derive(Clone)]
struct DecodedFrame {
nv12: Vec<u8>,
width: u32,
height: u32,
bit_depth_minus8: u8,
color_space: ColorSpace,
timestamp: u64,
}
struct FrameCollector {
frames: VecDeque<DecodedFrame>,
}
struct CallbackState {
cuvid_create_decoder: FnCuvidCreateDecoder,
cuvid_get_decoder_caps: FnCuvidGetDecoderCaps,
cuvid_decode_picture: FnCuvidDecodePicture,
cuvid_map_video_frame: FnCuvidMapVideoFrame,
cuvid_unmap_video_frame: FnCuvidUnmapVideoFrame,
cu_memcpy2d: FnCuMemcpy2D,
decoder: Option<CUvideodecoder>,
collector: Arc<Mutex<FrameCollector>>,
width: u32,
height: u32,
codec_type: c_int,
bit_depth_luma_minus8: u8,
color_space: ColorSpace,
vui_colour_primaries: u8,
vui_transfer_characteristics: u8,
vui_matrix_coefficients: u8,
vui_full_range_flag: bool,
error: Option<String>,
typed_error: Option<NvdecError>,
}
unsafe impl Send for CallbackState {}
impl CallbackState {
fn set_error(&mut self, msg: impl Into<String>) {
if self.error.is_none() {
self.error = Some(msg.into());
}
}
fn set_typed_error(&mut self, err: NvdecError) {
if self.typed_error.is_none() {
self.typed_error = Some(err.clone());
}
self.set_error(err.to_string());
}
}
struct CtxScope {
pop: FnCuCtxPopCurrent,
}
impl CtxScope {
unsafe fn push(
ctx: CUcontext,
push: FnCuCtxPushCurrent,
pop: FnCuCtxPopCurrent,
) -> Result<Self> {
unsafe {
if push(ctx) != 0 {
bail!("cuCtxPushCurrent failed");
}
Ok(Self { pop })
}
}
}
impl Drop for CtxScope {
fn drop(&mut self) {
let mut popped: CUcontext = ptr::null_mut();
unsafe {
(self.pop)(&mut popped);
}
}
}
pub struct NvdecDecoder {
info: StreamInfo,
decoded_frames: Vec<DecodedFrame>,
frame_cursor: usize,
_cuvid_lib: libloading::Library,
_cuda_lib: libloading::Library,
}
impl NvdecDecoder {
#[allow(clippy::new_ret_no_self)]
pub fn new(info: StreamInfo, gpu_index: u32) -> Box<dyn Decoder> {
match NvdecStreamingDecoder::try_new(info.clone(), gpu_index) {
Ok(d) => Box::new(d),
Err(e) => {
tracing::warn!(error = %e, "NvdecStreamingDecoder init failed; first push will return the error");
Box::new(NvdecInitErrorDecoder {
info,
error: Some(e),
})
}
}
}
#[allow(clippy::new_ret_no_self)]
pub fn new_with_pts(
samples_with_pts: Vec<(Vec<u8>, u64)>,
info: StreamInfo,
gpu_index: u32,
) -> Result<Box<dyn Decoder>> {
let cuda_lib = unsafe { libloading::Library::new("libcuda.so") }
.or_else(|_| unsafe { libloading::Library::new("libcuda.so.1") })
.or_else(|_| unsafe { libloading::Library::new("nvcuda.dll") })
.context("loading CUDA driver — is the NVIDIA driver installed?")?;
let cuvid_lib = unsafe { libloading::Library::new("libnvcuvid.so") }
.or_else(|_| unsafe { libloading::Library::new("libnvcuvid.so.1") })
.or_else(|_| unsafe { libloading::Library::new("nvcuvid.dll") })
.context("loading cuvid — is the NVIDIA driver installed?")?;
let cuvid_codec = codec_to_cuvid(&info.codec)
.context(format!("unsupported NVDEC codec: {}", info.codec))?;
let decoded_frames = unsafe {
let cu_init: libloading::Symbol<FnCuInit> = cuda_lib.get(b"cuInit")?;
if cu_init(0) != 0 {
bail!("cuInit failed");
}
let cu_device_get: libloading::Symbol<FnCuDeviceGet> = cuda_lib.get(b"cuDeviceGet")?;
let mut device: CUdevice = 0;
if cu_device_get(&mut device, gpu_index as c_int) != 0 {
bail!("cuDeviceGet failed for GPU {gpu_index}");
}
let cu_ctx_create: libloading::Symbol<FnCuCtxCreate> =
cuda_lib.get(b"cuCtxCreate_v2")?;
let cu_ctx_destroy: libloading::Symbol<FnCuCtxDestroy> =
cuda_lib.get(b"cuCtxDestroy_v2")?;
let cu_ctx_push: libloading::Symbol<FnCuCtxPushCurrent> =
cuda_lib.get(b"cuCtxPushCurrent_v2")?;
let cu_ctx_pop: libloading::Symbol<FnCuCtxPopCurrent> =
cuda_lib.get(b"cuCtxPopCurrent_v2")?;
let mut ctx: CUcontext = ptr::null_mut();
if cu_ctx_create(&mut ctx, 0, device) != 0 {
bail!("cuCtxCreate failed");
}
let cuvid_create_parser: libloading::Symbol<FnCuvidCreateVideoParser> =
cuvid_lib.get(b"cuvidCreateVideoParser")?;
let cuvid_parse_data: libloading::Symbol<FnCuvidParseVideoData> =
cuvid_lib.get(b"cuvidParseVideoData")?;
let cuvid_destroy_parser: libloading::Symbol<FnCuvidDestroyVideoParser> =
cuvid_lib.get(b"cuvidDestroyVideoParser")?;
let cuvid_create_decoder: libloading::Symbol<FnCuvidCreateDecoder> =
cuvid_lib.get(b"cuvidCreateDecoder")?;
let cuvid_get_decoder_caps: libloading::Symbol<FnCuvidGetDecoderCaps> =
cuvid_lib.get(b"cuvidGetDecoderCaps")?;
let cuvid_destroy_decoder: libloading::Symbol<FnCuvidDestroyDecoder> =
cuvid_lib.get(b"cuvidDestroyDecoder")?;
let cuvid_decode_picture: libloading::Symbol<FnCuvidDecodePicture> =
cuvid_lib.get(b"cuvidDecodePicture")?;
let cuvid_map_video_frame: libloading::Symbol<FnCuvidMapVideoFrame> = cuvid_lib
.get(b"cuvidMapVideoFrame64")
.or_else(|_| cuvid_lib.get(b"cuvidMapVideoFrame"))?;
let cuvid_unmap_video_frame: libloading::Symbol<FnCuvidUnmapVideoFrame> = cuvid_lib
.get(b"cuvidUnmapVideoFrame64")
.or_else(|_| cuvid_lib.get(b"cuvidUnmapVideoFrame"))?;
let cu_memcpy2d: libloading::Symbol<FnCuMemcpy2D> = cuda_lib.get(b"cuMemcpy2D_v2")?;
let collector = Arc::new(Mutex::new(FrameCollector {
frames: VecDeque::new(),
}));
let mut state = Box::new(CallbackState {
cuvid_create_decoder: *cuvid_create_decoder,
cuvid_get_decoder_caps: *cuvid_get_decoder_caps,
cuvid_decode_picture: *cuvid_decode_picture,
cuvid_map_video_frame: *cuvid_map_video_frame,
cuvid_unmap_video_frame: *cuvid_unmap_video_frame,
cu_memcpy2d: *cu_memcpy2d,
decoder: None,
collector: Arc::clone(&collector),
width: info.width,
height: info.height,
codec_type: cuvid_codec,
bit_depth_luma_minus8: 0,
color_space: ColorSpace::Bt709,
vui_colour_primaries: 1,
vui_transfer_characteristics: 1,
vui_matrix_coefficients: 1,
vui_full_range_flag: false,
error: None,
typed_error: None,
});
let state_ptr: *mut c_void = (&mut *state) as *mut CallbackState as *mut c_void;
let mut parser_params: CuVideoParserParams = std::mem::zeroed();
parser_params.codec_type = cuvid_codec;
parser_params.max_num_decode_surfaces = 20;
parser_params.clock_rate = 0;
parser_params.error_threshold = 100;
parser_params.max_display_delay = 4;
parser_params.reserved1[0] = 1;
parser_params.user_data = state_ptr;
parser_params.pfn_sequence_callback = Some(sequence_callback);
parser_params.pfn_decode_picture = Some(decode_callback);
parser_params.pfn_display_picture = Some(display_callback);
parser_params.pfn_get_operating_point = Some(get_operating_point_callback);
let mut parser: CUvideoparser = ptr::null_mut();
let create_rc = cuvid_create_parser(&mut parser, &mut parser_params);
if create_rc != 0 {
cu_ctx_destroy(ctx);
bail!("cuvidCreateVideoParser failed: {create_rc}");
}
let parse_result: Result<()> = (|| {
let _scope = CtxScope::push(ctx, *cu_ctx_push, *cu_ctx_pop)?;
for (idx, (sample, pts)) in samples_with_pts.iter().enumerate() {
if sample.is_empty() {
continue;
}
let mut packet: CuVideoSourceDataPacket = std::mem::zeroed();
packet.payload_size = sample.len() as c_ulong;
packet.payload = sample.as_ptr();
packet.timestamp = *pts as c_ulonglong;
packet.flags = CUVID_PKT_TIMESTAMP;
let rc = cuvid_parse_data(parser, &mut packet);
if rc != 0 && idx == 0 {
tracing::warn!(
rc = rc,
"cuvidParseVideoData returned non-zero at first sample"
);
}
if let Some(e) = &state.error {
tracing::warn!(error = %e, "NVDEC callback reported failure");
break;
}
}
let mut eos_packet: CuVideoSourceDataPacket = std::mem::zeroed();
eos_packet.flags = CUVID_PKT_ENDOFSTREAM;
cuvid_parse_data(parser, &mut eos_packet);
Ok(())
})();
cuvid_destroy_parser(parser);
if let Some(dec) = state.decoder.take() {
cuvid_destroy_decoder(dec);
}
cu_ctx_destroy(ctx);
parse_result?;
let cb_error = state.error.take();
let cb_typed_error = state.typed_error.take();
let cb_colour_primaries = state.vui_colour_primaries;
let cb_transfer = state.vui_transfer_characteristics;
let cb_matrix_coefficients = state.vui_matrix_coefficients;
let cb_full_range = state.vui_full_range_flag;
let cb_color_space = state.color_space;
drop(state);
let collected = collector.lock().unwrap();
tracing::info!(
codec = cuvid_codec,
gpu = gpu_index,
frames = collected.frames.len(),
"NVDEC decode complete"
);
if collected.frames.is_empty() {
if let Some(te) = cb_typed_error {
return Err(anyhow::Error::new(te));
}
if let Some(e) = cb_error {
bail!("NVDEC produced no frames: {e}");
}
bail!("NVDEC produced no frames");
}
let frames_vec: Vec<DecodedFrame> = collected.frames.iter().cloned().collect();
(
frames_vec,
cb_color_space,
cb_colour_primaries,
cb_transfer,
cb_matrix_coefficients,
cb_full_range,
)
};
let (
decoded_frames,
cb_color_space,
cb_colour_primaries,
cb_transfer,
cb_matrix_coefficients,
cb_full_range,
) = decoded_frames;
let mut info = info;
info.color_space = cb_color_space;
info.color_metadata = ColorMetadata {
transfer: TransferFn::from_h273(cb_transfer),
matrix_coefficients: cb_matrix_coefficients,
colour_primaries: cb_colour_primaries,
full_range: cb_full_range,
mastering_display: info.color_metadata.mastering_display,
content_light_level: info.color_metadata.content_light_level,
};
Ok(Box::new(NvdecDecoder {
info,
decoded_frames,
frame_cursor: 0,
_cuvid_lib: cuvid_lib,
_cuda_lib: cuda_lib,
}))
}
}
unsafe extern "C" fn sequence_callback(
user_data: *mut c_void,
format: *mut CuVideoFormat,
) -> c_int {
unsafe {
catch_unwind(AssertUnwindSafe(|| {
if user_data.is_null() || format.is_null() {
return 0;
}
let state = &mut *(user_data as *mut CallbackState);
let fmt = &*format;
if fmt.codec != state.codec_type {
tracing::warn!(
expected = state.codec_type,
got = fmt.codec,
"NVDEC sequence_callback codec mismatch — ABI drift suspected"
);
state.set_error(format!(
"sequence_callback codec mismatch: expected {} got {}",
state.codec_type, fmt.codec
));
return 0;
}
let num_surfaces = (fmt.min_num_decode_surfaces as c_uint).clamp(20, 32) as c_ulong;
tracing::info!(
codec = fmt.codec,
width = fmt.coded_width,
height = fmt.coded_height,
chroma = fmt.chroma_format,
bit_depth = fmt.bit_depth_luma_minus8 + 8,
surfaces = num_surfaces,
"NVDEC backend engaged"
);
if let Some(err) = validate_format(
fmt.chroma_format,
fmt.bit_depth_luma_minus8,
fmt.coded_width,
fmt.coded_height,
) {
match &err {
NvdecError::UnsupportedChroma { label, .. } => {
tracing::warn!(
codec = state.codec_type,
w = fmt.coded_width,
h = fmt.coded_height,
chroma = fmt.chroma_format,
chroma_label = *label,
"NVDEC rejecting: chroma {} unsupported",
label
);
}
NvdecError::UnsupportedPixelFormat { bit_depth } => {
tracing::warn!(
codec = state.codec_type,
w = fmt.coded_width,
h = fmt.coded_height,
bit_depth = bit_depth,
"NVDEC rejecting: {}-bit content unsupported",
bit_depth
);
}
NvdecError::UnsupportedByHardware { reason } => {
tracing::warn!(codec = state.codec_type, "NVDEC rejecting: {reason}");
}
}
state.set_typed_error(err);
return 0;
}
{
let mut caps: CuVideoDecodeCaps = std::mem::zeroed();
caps.codec_type = state.codec_type;
caps.chroma_format = fmt.chroma_format;
caps.bit_depth_minus8 = fmt.bit_depth_luma_minus8 as u32;
if (state.cuvid_get_decoder_caps)(&mut caps) == 0 {
if caps.is_supported == 0 {
let reason = format!(
"GPU NVDEC does not support codec={} chroma={} {}-bit",
state.codec_type,
fmt.chroma_format,
fmt.bit_depth_luma_minus8 + 8
);
tracing::warn!(
codec = state.codec_type,
chroma = fmt.chroma_format,
bit_depth = fmt.bit_depth_luma_minus8 + 8,
"NVDEC rejecting: {reason}"
);
state.set_typed_error(NvdecError::UnsupportedByHardware { reason });
return 0;
}
if caps.max_width > 0
&& caps.max_height > 0
&& (fmt.coded_width > caps.max_width || fmt.coded_height > caps.max_height)
{
let reason = format!(
"frame {}x{} exceeds NVDEC max {}x{}",
fmt.coded_width, fmt.coded_height, caps.max_width, caps.max_height
);
tracing::warn!(
w = fmt.coded_width,
h = fmt.coded_height,
max_w = caps.max_width,
max_h = caps.max_height,
"NVDEC rejecting: {reason}"
);
state.set_typed_error(NvdecError::UnsupportedByHardware { reason });
return 0;
}
tracing::debug!(
codec = state.codec_type,
max_w = caps.max_width,
max_h = caps.max_height,
"NVDEC capability validated"
);
}
}
let is_high_bit_depth = fmt.bit_depth_luma_minus8 > 0;
state.bit_depth_luma_minus8 = fmt.bit_depth_luma_minus8;
let cp = fmt.video_signal_description[1];
let tc = fmt.video_signal_description[2];
let mc = fmt.video_signal_description[3];
let full_range = (fmt.video_signal_description[0] >> 3) & 1 == 1;
state.vui_colour_primaries = cp;
state.vui_transfer_characteristics = tc;
state.vui_matrix_coefficients = mc;
state.vui_full_range_flag = full_range;
state.color_space = match mc {
1 => ColorSpace::Bt709,
5 | 6 => ColorSpace::Bt601,
9 | 10 => ColorSpace::Bt2020,
_ => {
if is_high_bit_depth {
ColorSpace::Bt2020
} else {
ColorSpace::Bt709
}
}
};
tracing::info!(
matrix_coefficients = mc,
color_primaries = fmt.video_signal_description[1],
transfer = fmt.video_signal_description[2],
color_space = ?state.color_space,
"NVDEC color metadata"
);
if state.decoder.is_none() {
let mut create_info: CuVideoDecodeCreateInfo = std::mem::zeroed();
create_info.code_width = fmt.coded_width as c_ulong;
create_info.coded_height = fmt.coded_height as c_ulong;
create_info.num_decode_surfaces = num_surfaces;
create_info.codec_type = state.codec_type;
create_info.chroma_format = CUVID_CHROMA_420;
create_info.creation_flags = CUVID_CREATE_PREFER_CUVID;
create_info.bit_depth_minus8 = fmt.bit_depth_luma_minus8 as c_ulong;
create_info.output_format = if is_high_bit_depth {
CUVID_FMT_P016
} else {
CUVID_FMT_NV12
};
create_info.deinterlace_mode = if fmt.progressive_sequence != 0 {
0
} else if state.codec_type == CUVID_H264 {
2
} else {
1
};
create_info.target_width = fmt.coded_width as c_ulong;
create_info.target_height = fmt.coded_height as c_ulong;
create_info.num_output_surfaces = 4;
create_info.max_width = 0;
create_info.max_height = 0;
state.width = fmt.coded_width;
state.height = fmt.coded_height;
let mut decoder: CUvideodecoder = ptr::null_mut();
let rc = (state.cuvid_create_decoder)(&mut decoder, &mut create_info);
if rc != 0 {
state.set_error(format!("cuvidCreateDecoder failed: {rc}"));
return 0;
}
state.decoder = Some(decoder);
}
num_surfaces as c_int
}))
.unwrap_or(0)
}
}
unsafe extern "C" fn decode_callback(
user_data: *mut c_void,
pic_params: *mut CuVideoPicParams,
) -> c_int {
unsafe {
catch_unwind(AssertUnwindSafe(|| {
if user_data.is_null() || pic_params.is_null() {
return 0;
}
let state = &mut *(user_data as *mut CallbackState);
let Some(decoder) = state.decoder else {
state.set_error("decode_callback before decoder created");
return 0;
};
let rc = (state.cuvid_decode_picture)(decoder, pic_params);
if rc != 0 {
state.set_error(format!("cuvidDecodePicture failed: {rc}"));
return 0;
}
1
}))
.unwrap_or(0)
}
}
unsafe extern "C" fn display_callback(
user_data: *mut c_void,
disp_info: *mut CuVideoDispInfo,
) -> c_int {
unsafe {
catch_unwind(AssertUnwindSafe(|| {
if user_data.is_null() || disp_info.is_null() {
return 0;
}
let state = &mut *(user_data as *mut CallbackState);
let info = &*disp_info;
let Some(decoder) = state.decoder else {
state.set_error("display_callback before decoder created");
return 0;
};
if info.picture_index < 0 {
state.set_error(format!(
"display_callback picture_index invalid: {}",
info.picture_index
));
return 0;
}
let mut proc_params: CuVideoProcParams = std::mem::zeroed();
proc_params.progressive_frame = info.progressive_frame;
proc_params.second_field = 0;
proc_params.top_field_first = info.top_field_first;
proc_params.unpaired_field = 0;
let mut frame_ptr: CUdeviceptr = 0;
let mut pitch: c_uint = 0;
let rc = (state.cuvid_map_video_frame)(
decoder,
info.picture_index,
&mut frame_ptr,
&mut pitch,
&mut proc_params,
);
if rc != 0 {
state.set_error(format!("cuvidMapVideoFrame failed: {rc}"));
return 0;
}
let width = state.width as usize;
let height = state.height as usize;
let bytes_per_sample = if state.bit_depth_luma_minus8 > 0 {
2
} else {
1
};
let row_bytes = width * bytes_per_sample;
let chroma_height = height.div_ceil(2);
let y_bytes = row_bytes * height;
let uv_bytes = row_bytes * chroma_height;
let mut host_buf = vec![0u8; y_bytes + uv_bytes];
let mut luma_copy: CudaMemcpy2D = std::mem::zeroed();
luma_copy.src_memory_type = CU_MEMORYTYPE_DEVICE;
luma_copy.src_device = frame_ptr;
luma_copy.src_pitch = pitch as usize;
luma_copy.dst_memory_type = CU_MEMORYTYPE_HOST;
luma_copy.dst_host = host_buf.as_mut_ptr() as *mut c_void;
luma_copy.dst_pitch = row_bytes;
luma_copy.width_in_bytes = row_bytes;
luma_copy.height = height;
let rc = (state.cu_memcpy2d)(&luma_copy);
if rc != 0 {
(state.cuvid_unmap_video_frame)(decoder, frame_ptr);
state.set_error(format!("cuMemcpy2D (luma) failed: {rc}"));
return 0;
}
let chroma_src = frame_ptr + (pitch as CUdeviceptr) * (height as CUdeviceptr);
let mut chroma_copy: CudaMemcpy2D = std::mem::zeroed();
chroma_copy.src_memory_type = CU_MEMORYTYPE_DEVICE;
chroma_copy.src_device = chroma_src;
chroma_copy.src_pitch = pitch as usize;
chroma_copy.dst_memory_type = CU_MEMORYTYPE_HOST;
chroma_copy.dst_host = host_buf[y_bytes..].as_mut_ptr() as *mut c_void;
chroma_copy.dst_pitch = row_bytes;
chroma_copy.width_in_bytes = row_bytes;
chroma_copy.height = chroma_height;
let rc = (state.cu_memcpy2d)(&chroma_copy);
let _ = (state.cuvid_unmap_video_frame)(decoder, frame_ptr);
if rc != 0 {
state.set_error(format!("cuMemcpy2D (chroma) failed: {rc}"));
return 0;
}
if let Ok(mut c) = state.collector.lock() {
c.frames.push_back(DecodedFrame {
nv12: host_buf,
width: state.width,
height: state.height,
bit_depth_minus8: state.bit_depth_luma_minus8,
color_space: state.color_space,
timestamp: info.timestamp,
});
}
1
}))
.unwrap_or(0)
}
}
unsafe extern "C" fn get_operating_point_callback(
_user_data: *mut c_void,
_op_info: *mut c_void,
) -> c_int {
catch_unwind(AssertUnwindSafe(|| 0_i32)).unwrap_or(0)
}
#[repr(C)]
#[allow(dead_code)]
struct CuVideoOperatingPointInfo {
codec: c_int,
reserved: [u8; 1024],
}
const _: () = assert!(std::mem::size_of::<CuVideoOperatingPointInfo>() <= 1024 + 8);
impl NvdecDecoder {
#[doc(hidden)]
pub fn test_new_from_frames(
frames: Vec<(Vec<u8>, u32, u32, u8, u64)>,
info: StreamInfo,
) -> Box<dyn Decoder> {
let decoded_frames: Vec<DecodedFrame> = frames
.into_iter()
.map(|(bytes, w, h, bd, pts)| DecodedFrame {
nv12: bytes,
width: w,
height: h,
bit_depth_minus8: bd,
color_space: ColorSpace::Bt709,
timestamp: pts,
})
.collect();
let cuda_lib = unsafe { libloading::Library::new("kernel32.dll") }
.or_else(|_| unsafe { libloading::Library::new("libc.so.6") })
.or_else(|_| unsafe { libloading::Library::new("/usr/lib/libSystem.B.dylib") })
.expect("test harness: a placeholder system library must load");
let cuvid_lib = unsafe { libloading::Library::new("kernel32.dll") }
.or_else(|_| unsafe { libloading::Library::new("libc.so.6") })
.or_else(|_| unsafe { libloading::Library::new("/usr/lib/libSystem.B.dylib") })
.expect("test harness: a placeholder system library must load");
Box::new(NvdecDecoder {
info,
decoded_frames,
frame_cursor: 0,
_cuvid_lib: cuvid_lib,
_cuda_lib: cuda_lib,
})
}
}
impl Decoder for NvdecDecoder {
fn stream_info(&self) -> &StreamInfo {
&self.info
}
fn push_sample(&mut self, _data: &[u8]) -> Result<()> {
anyhow::bail!(
"NvdecDecoder: push_sample on eager-mode instance — use NvdecPushDecoder for streaming"
);
}
fn finish(&mut self) -> Result<()> {
Ok(())
}
fn decode_next(&mut self) -> Result<Option<VideoFrame>> {
if self.frame_cursor >= self.decoded_frames.len() {
return Ok(None);
}
let frame = &self.decoded_frames[self.frame_cursor];
self.frame_cursor += 1;
Ok(Some(decoded_frame_to_video_frame(frame)))
}
}
fn decoded_frame_to_video_frame(frame: &DecodedFrame) -> VideoFrame {
let w = frame.width as usize;
let h = frame.height as usize;
let cw = w.div_ceil(2);
let ch = h.div_ceil(2);
let uv_pairs = cw * ch;
let (yuv, pixel_format) = if frame.bit_depth_minus8 > 0 {
let _ = uv_pairs; let out = deinterleave_p016_to_yuv420p10le(&frame.nv12, w, h);
(out, PixelFormat::Yuv420p10le)
} else {
let y_size = w * h;
let mut out = Vec::with_capacity(y_size + uv_pairs * 2);
out.extend_from_slice(&frame.nv12[..y_size.min(frame.nv12.len())]);
if frame.nv12.len() > y_size {
let uv = &frame.nv12[y_size..];
let mut u = Vec::with_capacity(uv_pairs);
let mut v = Vec::with_capacity(uv_pairs);
for i in 0..uv_pairs {
if i * 2 + 1 < uv.len() {
u.push(uv[i * 2]);
v.push(uv[i * 2 + 1]);
}
}
out.extend_from_slice(&u);
out.extend_from_slice(&v);
}
(out, PixelFormat::Yuv420p)
};
VideoFrame::new(
Bytes::from(yuv),
frame.width,
frame.height,
pixel_format,
frame.color_space,
frame.timestamp,
)
}
pub struct NvdecPushDecoder {
info: StreamInfo,
gpu_index: u32,
pending_samples: Vec<(Vec<u8>, u64)>,
decoded: Option<Box<dyn Decoder>>,
finished: bool,
}
impl NvdecPushDecoder {
pub fn new(info: StreamInfo, gpu_index: u32) -> Self {
Self {
info,
gpu_index,
pending_samples: Vec::new(),
decoded: None,
finished: false,
}
}
pub fn push_sample_with_pts(&mut self, data: &[u8], pts: u64) -> Result<()> {
if self.finished {
anyhow::bail!("NvdecPushDecoder: push_sample after finish");
}
self.pending_samples.push((data.to_vec(), pts));
Ok(())
}
}
impl Decoder for NvdecPushDecoder {
fn stream_info(&self) -> &StreamInfo {
&self.info
}
fn push_sample(&mut self, data: &[u8]) -> Result<()> {
if self.finished {
anyhow::bail!("NvdecPushDecoder: push_sample after finish");
}
let pts = self.pending_samples.len() as u64;
self.pending_samples.push((data.to_vec(), pts));
Ok(())
}
fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
}
self.finished = true;
let samples = std::mem::take(&mut self.pending_samples);
self.decoded = Some(NvdecDecoder::new_with_pts(
samples,
self.info.clone(),
self.gpu_index,
)?);
Ok(())
}
fn decode_next(&mut self) -> Result<Option<VideoFrame>> {
match self.decoded.as_mut() {
Some(inner) => inner.decode_next(),
None => {
Ok(None)
}
}
}
}
struct NvdecCtx {
cu_ctx: CUcontext,
cu_ctx_destroy: FnCuCtxDestroy,
cu_ctx_push: FnCuCtxPushCurrent,
cu_ctx_pop: FnCuCtxPopCurrent,
cuvid_destroy_parser: FnCuvidDestroyVideoParser,
cuvid_destroy_decoder: FnCuvidDestroyDecoder,
cuvid_parse_data: FnCuvidParseVideoData,
}
unsafe impl Send for NvdecCtx {}
pub struct NvdecStreamingDecoder {
info: StreamInfo,
state: Box<CallbackState>,
collector: Arc<Mutex<FrameCollector>>,
parser: CUvideoparser,
ctx: NvdecCtx,
finished: bool,
sample_counter: u64,
_cuvid_lib: libloading::Library,
_cuda_lib: libloading::Library,
}
unsafe impl Send for NvdecStreamingDecoder {}
impl NvdecStreamingDecoder {
fn try_new(info: StreamInfo, gpu_index: u32) -> Result<Self> {
let _init_guard = crate::cuda_lock::lock_for_cuda_init();
let cuda_lib = unsafe { libloading::Library::new("libcuda.so") }
.or_else(|_| unsafe { libloading::Library::new("libcuda.so.1") })
.or_else(|_| unsafe { libloading::Library::new("nvcuda.dll") })
.context("loading CUDA driver — is the NVIDIA driver installed?")?;
let cuvid_lib = unsafe { libloading::Library::new("libnvcuvid.so") }
.or_else(|_| unsafe { libloading::Library::new("libnvcuvid.so.1") })
.or_else(|_| unsafe { libloading::Library::new("nvcuvid.dll") })
.context("loading cuvid — is the NVIDIA driver installed?")?;
let cuvid_codec = codec_to_cuvid(&info.codec)
.context(format!("unsupported NVDEC codec: {}", info.codec))?;
let (state, parser, ctx) = unsafe {
let cu_init: libloading::Symbol<FnCuInit> = cuda_lib.get(b"cuInit")?;
if cu_init(0) != 0 {
bail!("cuInit failed");
}
let cu_device_get: libloading::Symbol<FnCuDeviceGet> = cuda_lib.get(b"cuDeviceGet")?;
let mut device: CUdevice = 0;
if cu_device_get(&mut device, gpu_index as c_int) != 0 {
bail!("cuDeviceGet failed for GPU {gpu_index}");
}
let cu_ctx_create: libloading::Symbol<FnCuCtxCreate> =
cuda_lib.get(b"cuCtxCreate_v2")?;
let cu_ctx_destroy: libloading::Symbol<FnCuCtxDestroy> =
cuda_lib.get(b"cuCtxDestroy_v2")?;
let cu_ctx_push: libloading::Symbol<FnCuCtxPushCurrent> =
cuda_lib.get(b"cuCtxPushCurrent_v2")?;
let cu_ctx_pop: libloading::Symbol<FnCuCtxPopCurrent> =
cuda_lib.get(b"cuCtxPopCurrent_v2")?;
let mut cu_ctx: CUcontext = ptr::null_mut();
if cu_ctx_create(&mut cu_ctx, 0, device) != 0 {
bail!("cuCtxCreate failed");
}
let cuvid_create_parser: libloading::Symbol<FnCuvidCreateVideoParser> =
cuvid_lib.get(b"cuvidCreateVideoParser")?;
let cuvid_parse_data: libloading::Symbol<FnCuvidParseVideoData> =
cuvid_lib.get(b"cuvidParseVideoData")?;
let cuvid_destroy_parser: libloading::Symbol<FnCuvidDestroyVideoParser> =
cuvid_lib.get(b"cuvidDestroyVideoParser")?;
let cuvid_create_decoder: libloading::Symbol<FnCuvidCreateDecoder> =
cuvid_lib.get(b"cuvidCreateDecoder")?;
let cuvid_get_decoder_caps: libloading::Symbol<FnCuvidGetDecoderCaps> =
cuvid_lib.get(b"cuvidGetDecoderCaps")?;
let cuvid_destroy_decoder: libloading::Symbol<FnCuvidDestroyDecoder> =
cuvid_lib.get(b"cuvidDestroyDecoder")?;
let cuvid_decode_picture: libloading::Symbol<FnCuvidDecodePicture> =
cuvid_lib.get(b"cuvidDecodePicture")?;
let cuvid_map_video_frame: libloading::Symbol<FnCuvidMapVideoFrame> = cuvid_lib
.get(b"cuvidMapVideoFrame64")
.or_else(|_| cuvid_lib.get(b"cuvidMapVideoFrame"))?;
let cuvid_unmap_video_frame: libloading::Symbol<FnCuvidUnmapVideoFrame> = cuvid_lib
.get(b"cuvidUnmapVideoFrame64")
.or_else(|_| cuvid_lib.get(b"cuvidUnmapVideoFrame"))?;
let cu_memcpy2d: libloading::Symbol<FnCuMemcpy2D> = cuda_lib.get(b"cuMemcpy2D_v2")?;
let collector = Arc::new(Mutex::new(FrameCollector {
frames: VecDeque::new(),
}));
let mut state = Box::new(CallbackState {
cuvid_create_decoder: *cuvid_create_decoder,
cuvid_get_decoder_caps: *cuvid_get_decoder_caps,
cuvid_decode_picture: *cuvid_decode_picture,
cuvid_map_video_frame: *cuvid_map_video_frame,
cuvid_unmap_video_frame: *cuvid_unmap_video_frame,
cu_memcpy2d: *cu_memcpy2d,
decoder: None,
collector: Arc::clone(&collector),
width: info.width,
height: info.height,
codec_type: cuvid_codec,
bit_depth_luma_minus8: 0,
color_space: ColorSpace::Bt709,
vui_colour_primaries: 1,
vui_transfer_characteristics: 1,
vui_matrix_coefficients: 1,
vui_full_range_flag: false,
error: None,
typed_error: None,
});
let state_ptr: *mut c_void = (&mut *state) as *mut CallbackState as *mut c_void;
let mut parser_params: CuVideoParserParams = std::mem::zeroed();
parser_params.codec_type = cuvid_codec;
parser_params.max_num_decode_surfaces = 20;
parser_params.clock_rate = 0;
parser_params.error_threshold = 100;
parser_params.max_display_delay = 4;
parser_params.reserved1[0] = 1;
parser_params.user_data = state_ptr;
parser_params.pfn_sequence_callback = Some(sequence_callback);
parser_params.pfn_decode_picture = Some(decode_callback);
parser_params.pfn_display_picture = Some(display_callback);
parser_params.pfn_get_operating_point = Some(get_operating_point_callback);
let mut parser: CUvideoparser = ptr::null_mut();
let create_rc = cuvid_create_parser(&mut parser, &mut parser_params);
if create_rc != 0 {
cu_ctx_destroy(cu_ctx);
bail!("cuvidCreateVideoParser failed: {create_rc}");
}
let ctx = NvdecCtx {
cu_ctx,
cu_ctx_destroy: *cu_ctx_destroy,
cu_ctx_push: *cu_ctx_push,
cu_ctx_pop: *cu_ctx_pop,
cuvid_destroy_parser: *cuvid_destroy_parser,
cuvid_destroy_decoder: *cuvid_destroy_decoder,
cuvid_parse_data: *cuvid_parse_data,
};
(state, parser, ctx)
};
let collector = Arc::clone(&state.collector);
Ok(Self {
info,
state,
collector,
parser,
ctx,
finished: false,
sample_counter: 0,
_cuvid_lib: cuvid_lib,
_cuda_lib: cuda_lib,
})
}
pub fn push_sample_with_pts(&mut self, data: &[u8], pts: u64) -> Result<()> {
if self.finished {
anyhow::bail!("NvdecStreamingDecoder: push_sample after finish");
}
if data.is_empty() {
return Ok(());
}
unsafe {
let _scope = CtxScope::push(self.ctx.cu_ctx, self.ctx.cu_ctx_push, self.ctx.cu_ctx_pop)
.context("push CUDA context for incremental parse")?;
let mut packet: CuVideoSourceDataPacket = std::mem::zeroed();
packet.payload_size = data.len() as c_ulong;
packet.payload = data.as_ptr();
packet.timestamp = pts as c_ulonglong;
packet.flags = CUVID_PKT_TIMESTAMP;
let rc = (self.ctx.cuvid_parse_data)(self.parser, &mut packet);
if rc != 0 {
if self.state.error.is_none() {
tracing::warn!(
rc = rc,
"cuvidParseVideoData returned non-zero (incremental)"
);
}
}
}
if let Some(te) = self.state.typed_error.take() {
self.finished = true;
return Err(anyhow::Error::new(te));
}
Ok(())
}
}
impl Drop for NvdecStreamingDecoder {
fn drop(&mut self) {
unsafe {
let push_rc = (self.ctx.cu_ctx_push)(self.ctx.cu_ctx);
if push_rc != 0 {
tracing::warn!(rc = push_rc, "Drop: cuCtxPushCurrent failed");
}
(self.ctx.cuvid_destroy_parser)(self.parser);
if let Some(dec) = self.state.decoder.take() {
(self.ctx.cuvid_destroy_decoder)(dec);
}
let mut popped: CUcontext = ptr::null_mut();
(self.ctx.cu_ctx_pop)(&mut popped);
(self.ctx.cu_ctx_destroy)(self.ctx.cu_ctx);
}
}
}
impl Decoder for NvdecStreamingDecoder {
fn stream_info(&self) -> &StreamInfo {
&self.info
}
fn push_sample(&mut self, data: &[u8]) -> Result<()> {
let pts = self.sample_counter;
self.sample_counter += 1;
self.push_sample_with_pts(data, pts)
}
fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
}
self.finished = true;
unsafe {
let _scope = CtxScope::push(self.ctx.cu_ctx, self.ctx.cu_ctx_push, self.ctx.cu_ctx_pop)
.context("push CUDA context for EOS flush")?;
let mut eos_packet: CuVideoSourceDataPacket = std::mem::zeroed();
eos_packet.flags = CUVID_PKT_ENDOFSTREAM;
(self.ctx.cuvid_parse_data)(self.parser, &mut eos_packet);
}
self.info.color_space = self.state.color_space;
self.info.color_metadata = ColorMetadata {
transfer: TransferFn::from_h273(self.state.vui_transfer_characteristics),
matrix_coefficients: self.state.vui_matrix_coefficients,
colour_primaries: self.state.vui_colour_primaries,
full_range: self.state.vui_full_range_flag,
mastering_display: self.info.color_metadata.mastering_display,
content_light_level: self.info.color_metadata.content_light_level,
};
if let Some(te) = self.state.typed_error.take() {
return Err(anyhow::Error::new(te));
}
Ok(())
}
fn decode_next(&mut self) -> Result<Option<VideoFrame>> {
if let Some(te) = self.state.typed_error.take() {
return Err(anyhow::Error::new(te));
}
let mut guard = self.collector.lock().unwrap();
match guard.frames.pop_front() {
Some(frame) => Ok(Some(decoded_frame_to_video_frame(&frame))),
None => Ok(None),
}
}
}
struct NvdecInitErrorDecoder {
info: StreamInfo,
error: Option<anyhow::Error>,
}
impl Decoder for NvdecInitErrorDecoder {
fn stream_info(&self) -> &StreamInfo {
&self.info
}
fn push_sample(&mut self, _data: &[u8]) -> Result<()> {
if let Some(e) = self.error.take() {
return Err(e);
}
Ok(())
}
fn finish(&mut self) -> Result<()> {
if let Some(e) = self.error.take() {
return Err(e);
}
Ok(())
}
fn decode_next(&mut self) -> Result<Option<VideoFrame>> {
if let Some(e) = self.error.take() {
return Err(e);
}
Ok(None)
}
}