use anyhow::{Context, Result, bail};
use std::collections::VecDeque;
use std::ffi::c_void;
use std::os::raw::{c_int, c_ulong, c_ulonglong};
use std::ptr;
use std::sync::{Arc, Mutex};
use crate::decode::Decoder;
use crate::frame::{ColorMetadata, ColorSpace, StreamInfo, TransferFn, VideoFrame};
use super::callbacks::{
decode_callback, display_callback, get_operating_point_callback, sequence_callback,
};
use super::convert::{codec_to_cuvid, decoded_frame_to_video_frame};
use super::ffi::{
CUcontext, CUdevice,
CuVideoParserParams, CuVideoSourceDataPacket, CUvideoparser,
FnCuCtxCreate, FnCuCtxDestroy, FnCuCtxPopCurrent, FnCuCtxPushCurrent, FnCuDeviceGet,
FnCuInit, FnCuMemcpy2D,
FnCuvidCreateDecoder, FnCuvidCreateVideoParser, FnCuvidDecodePicture,
FnCuvidDestroyDecoder, FnCuvidDestroyVideoParser, FnCuvidGetDecoderCaps,
FnCuvidMapVideoFrame, FnCuvidParseVideoData, FnCuvidUnmapVideoFrame,
CUVID_PKT_ENDOFSTREAM, CUVID_PKT_TIMESTAMP,
};
use super::state::{CallbackState, CtxScope, FrameCollector};
pub(super) struct NvdecCtx {
pub(super) cu_ctx: CUcontext,
pub(super) cu_ctx_destroy: FnCuCtxDestroy,
pub(super) cu_ctx_push: FnCuCtxPushCurrent,
pub(super) cu_ctx_pop: FnCuCtxPopCurrent,
pub(super) cuvid_destroy_parser: FnCuvidDestroyVideoParser,
pub(super) cuvid_destroy_decoder: FnCuvidDestroyDecoder,
pub(super) cuvid_parse_data: FnCuvidParseVideoData,
}
unsafe impl Send for NvdecCtx {}
pub struct NvdecStreamingDecoder {
pub(super) info: StreamInfo,
pub(super) state: Box<CallbackState>,
pub(super) collector: Arc<Mutex<FrameCollector>>,
pub(super) parser: CUvideoparser,
pub(super) ctx: NvdecCtx,
pub(super) finished: bool,
pub(super) sample_counter: u64,
pub(super) _cuvid_lib: libloading::Library,
pub(super) _cuda_lib: libloading::Library,
}
unsafe impl Send for NvdecStreamingDecoder {}
impl NvdecStreamingDecoder {
pub(super) fn try_new(info: StreamInfo, gpu_index: u32) -> Result<Self> {
let _init_guard = crate::cuda_lock::lock_for_cuda_init();
let cuda_lib = unsafe { libloading::Library::new("libcuda.so") }
.or_else(|_| unsafe { libloading::Library::new("libcuda.so.1") })
.or_else(|_| unsafe { libloading::Library::new("nvcuda.dll") })
.context("loading CUDA driver — is the NVIDIA driver installed?")?;
let cuvid_lib = unsafe { libloading::Library::new("libnvcuvid.so") }
.or_else(|_| unsafe { libloading::Library::new("libnvcuvid.so.1") })
.or_else(|_| unsafe { libloading::Library::new("nvcuvid.dll") })
.context("loading cuvid — is the NVIDIA driver installed?")?;
let cuvid_codec = codec_to_cuvid(&info.codec)
.context(format!("unsupported NVDEC codec: {}", info.codec))?;
let (state, parser, ctx) = unsafe {
let cu_init: libloading::Symbol<FnCuInit> = cuda_lib.get(b"cuInit")?;
if cu_init(0) != 0 {
bail!("cuInit failed");
}
let cu_device_get: libloading::Symbol<FnCuDeviceGet> = cuda_lib.get(b"cuDeviceGet")?;
let mut device: CUdevice = 0;
if cu_device_get(&mut device, gpu_index as c_int) != 0 {
bail!("cuDeviceGet failed for GPU {gpu_index}");
}
let cu_ctx_create: libloading::Symbol<FnCuCtxCreate> =
cuda_lib.get(b"cuCtxCreate_v2")?;
let cu_ctx_destroy: libloading::Symbol<FnCuCtxDestroy> =
cuda_lib.get(b"cuCtxDestroy_v2")?;
let cu_ctx_push: libloading::Symbol<FnCuCtxPushCurrent> =
cuda_lib.get(b"cuCtxPushCurrent_v2")?;
let cu_ctx_pop: libloading::Symbol<FnCuCtxPopCurrent> =
cuda_lib.get(b"cuCtxPopCurrent_v2")?;
let mut cu_ctx: CUcontext = ptr::null_mut();
if cu_ctx_create(&mut cu_ctx, 0, device) != 0 {
bail!("cuCtxCreate failed");
}
let cuvid_create_parser: libloading::Symbol<FnCuvidCreateVideoParser> =
cuvid_lib.get(b"cuvidCreateVideoParser")?;
let cuvid_parse_data: libloading::Symbol<FnCuvidParseVideoData> =
cuvid_lib.get(b"cuvidParseVideoData")?;
let cuvid_destroy_parser: libloading::Symbol<FnCuvidDestroyVideoParser> =
cuvid_lib.get(b"cuvidDestroyVideoParser")?;
let cuvid_create_decoder: libloading::Symbol<FnCuvidCreateDecoder> =
cuvid_lib.get(b"cuvidCreateDecoder")?;
let cuvid_get_decoder_caps: libloading::Symbol<FnCuvidGetDecoderCaps> =
cuvid_lib.get(b"cuvidGetDecoderCaps")?;
let cuvid_destroy_decoder: libloading::Symbol<FnCuvidDestroyDecoder> =
cuvid_lib.get(b"cuvidDestroyDecoder")?;
let cuvid_decode_picture: libloading::Symbol<FnCuvidDecodePicture> =
cuvid_lib.get(b"cuvidDecodePicture")?;
let cuvid_map_video_frame: libloading::Symbol<FnCuvidMapVideoFrame> = cuvid_lib
.get(b"cuvidMapVideoFrame64")
.or_else(|_| cuvid_lib.get(b"cuvidMapVideoFrame"))?;
let cuvid_unmap_video_frame: libloading::Symbol<FnCuvidUnmapVideoFrame> = cuvid_lib
.get(b"cuvidUnmapVideoFrame64")
.or_else(|_| cuvid_lib.get(b"cuvidUnmapVideoFrame"))?;
let cu_memcpy2d: libloading::Symbol<FnCuMemcpy2D> = cuda_lib.get(b"cuMemcpy2D_v2")?;
let collector = Arc::new(Mutex::new(FrameCollector {
frames: VecDeque::new(),
}));
let mut state = Box::new(CallbackState {
cuvid_create_decoder: *cuvid_create_decoder,
cuvid_get_decoder_caps: *cuvid_get_decoder_caps,
cuvid_decode_picture: *cuvid_decode_picture,
cuvid_map_video_frame: *cuvid_map_video_frame,
cuvid_unmap_video_frame: *cuvid_unmap_video_frame,
cu_memcpy2d: *cu_memcpy2d,
decoder: None,
collector: Arc::clone(&collector),
width: info.width,
height: info.height,
codec_type: cuvid_codec,
bit_depth_luma_minus8: 0,
color_space: ColorSpace::Bt709,
vui_colour_primaries: 1,
vui_transfer_characteristics: 1,
vui_matrix_coefficients: 1,
vui_full_range_flag: false,
error: None,
typed_error: None,
});
let state_ptr: *mut c_void = (&mut *state) as *mut CallbackState as *mut c_void;
let mut parser_params: CuVideoParserParams = std::mem::zeroed();
parser_params.codec_type = cuvid_codec;
parser_params.max_num_decode_surfaces = 20;
parser_params.clock_rate = 0;
parser_params.error_threshold = 100;
parser_params.max_display_delay = 4;
parser_params.reserved1[0] = 1;
parser_params.user_data = state_ptr;
parser_params.pfn_sequence_callback = Some(sequence_callback);
parser_params.pfn_decode_picture = Some(decode_callback);
parser_params.pfn_display_picture = Some(display_callback);
parser_params.pfn_get_operating_point = Some(get_operating_point_callback);
let mut parser: CUvideoparser = ptr::null_mut();
let create_rc = cuvid_create_parser(&mut parser, &mut parser_params);
if create_rc != 0 {
cu_ctx_destroy(cu_ctx);
bail!("cuvidCreateVideoParser failed: {create_rc}");
}
let ctx = NvdecCtx {
cu_ctx,
cu_ctx_destroy: *cu_ctx_destroy,
cu_ctx_push: *cu_ctx_push,
cu_ctx_pop: *cu_ctx_pop,
cuvid_destroy_parser: *cuvid_destroy_parser,
cuvid_destroy_decoder: *cuvid_destroy_decoder,
cuvid_parse_data: *cuvid_parse_data,
};
(state, parser, ctx)
};
let collector = Arc::clone(&state.collector);
Ok(Self {
info,
state,
collector,
parser,
ctx,
finished: false,
sample_counter: 0,
_cuvid_lib: cuvid_lib,
_cuda_lib: cuda_lib,
})
}
pub fn push_sample_with_pts(&mut self, data: &[u8], pts: u64) -> Result<()> {
if self.finished {
anyhow::bail!("NvdecStreamingDecoder: push_sample after finish");
}
if data.is_empty() {
return Ok(());
}
unsafe {
let _scope = CtxScope::push(self.ctx.cu_ctx, self.ctx.cu_ctx_push, self.ctx.cu_ctx_pop)
.context("push CUDA context for incremental parse")?;
let mut packet: CuVideoSourceDataPacket = std::mem::zeroed();
packet.payload_size = data.len() as c_ulong;
packet.payload = data.as_ptr();
packet.timestamp = pts as c_ulonglong;
packet.flags = CUVID_PKT_TIMESTAMP;
let rc = (self.ctx.cuvid_parse_data)(self.parser, &mut packet);
if rc != 0 {
if self.state.error.is_none() {
tracing::warn!(
rc = rc,
"cuvidParseVideoData returned non-zero (incremental)"
);
}
}
}
if let Some(te) = self.state.typed_error.take() {
self.finished = true;
return Err(anyhow::Error::new(te));
}
Ok(())
}
}
impl Drop for NvdecStreamingDecoder {
fn drop(&mut self) {
unsafe {
let push_rc = (self.ctx.cu_ctx_push)(self.ctx.cu_ctx);
if push_rc != 0 {
tracing::warn!(rc = push_rc, "Drop: cuCtxPushCurrent failed");
}
(self.ctx.cuvid_destroy_parser)(self.parser);
if let Some(dec) = self.state.decoder.take() {
(self.ctx.cuvid_destroy_decoder)(dec);
}
let mut popped: CUcontext = ptr::null_mut();
(self.ctx.cu_ctx_pop)(&mut popped);
(self.ctx.cu_ctx_destroy)(self.ctx.cu_ctx);
}
}
}
impl Decoder for NvdecStreamingDecoder {
fn stream_info(&self) -> &StreamInfo {
&self.info
}
fn push_sample(&mut self, data: &[u8]) -> Result<()> {
let pts = self.sample_counter;
self.sample_counter += 1;
self.push_sample_with_pts(data, pts)
}
fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
}
self.finished = true;
unsafe {
let _scope = CtxScope::push(self.ctx.cu_ctx, self.ctx.cu_ctx_push, self.ctx.cu_ctx_pop)
.context("push CUDA context for EOS flush")?;
let mut eos_packet: CuVideoSourceDataPacket = std::mem::zeroed();
eos_packet.flags = CUVID_PKT_ENDOFSTREAM;
(self.ctx.cuvid_parse_data)(self.parser, &mut eos_packet);
}
self.info.color_space = self.state.color_space;
self.info.color_metadata = ColorMetadata {
transfer: TransferFn::from_h273(self.state.vui_transfer_characteristics),
matrix_coefficients: self.state.vui_matrix_coefficients,
colour_primaries: self.state.vui_colour_primaries,
full_range: self.state.vui_full_range_flag,
mastering_display: self.info.color_metadata.mastering_display,
content_light_level: self.info.color_metadata.content_light_level,
};
if let Some(te) = self.state.typed_error.take() {
return Err(anyhow::Error::new(te));
}
Ok(())
}
fn decode_next(&mut self) -> Result<Option<VideoFrame>> {
if let Some(te) = self.state.typed_error.take() {
return Err(anyhow::Error::new(te));
}
let mut guard = self.collector.lock().unwrap();
match guard.frames.pop_front() {
Some(frame) => Ok(Some(decoded_frame_to_video_frame(&frame))),
None => Ok(None),
}
}
}
pub(super) struct NvdecInitErrorDecoder {
pub(super) info: StreamInfo,
pub(super) error: Option<anyhow::Error>,
}
impl Decoder for NvdecInitErrorDecoder {
fn stream_info(&self) -> &StreamInfo {
&self.info
}
fn push_sample(&mut self, _data: &[u8]) -> Result<()> {
if let Some(e) = self.error.take() {
return Err(e);
}
Ok(())
}
fn finish(&mut self) -> Result<()> {
if let Some(e) = self.error.take() {
return Err(e);
}
Ok(())
}
fn decode_next(&mut self) -> Result<Option<VideoFrame>> {
if let Some(e) = self.error.take() {
return Err(e);
}
Ok(None)
}
}