lumen-engine-ffmpeg 0.2.2

FFmpeg integration for media decode, encode, muxing, and GPU interop in Lumen.
Documentation
#![cfg(target_os = "linux")]

use std::{ffi::CStr, mem::MaybeUninit, sync::Arc};

use cudarc::driver::{CudaContext as CudaContextInner, result, sys};

use super::{CudaExternalMemoryHandle, CudaVideoFrame, interop, kernel};
pub use interop::ImportedCudaExternalImage;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CudaDeviceInfo {
    pub ordinal: i32,
    pub name: String,
    pub uuid: [u8; 16],
    pub pci_bus_id: String,
}

pub struct CudaDriver;

unsafe impl Send for CudaDriver {}
unsafe impl Sync for CudaDriver {}

impl CudaDriver {
    pub fn load() -> Result<Self, String> {
        result::init().map_err(|error| error.to_string())?;
        Ok(Self)
    }

    pub fn create_primary_context(&self) -> Result<CudaContext<'_>, String> {
        self.create_primary_context_for_ordinal(0)
    }

    pub fn create_primary_context_for_ordinal(
        &self,
        ordinal: i32,
    ) -> Result<CudaContext<'_>, String> {
        let inner =
            CudaContextInner::new(ordinal.max(0) as usize).map_err(|error| error.to_string())?;
        Ok(CudaContext {
            _driver: self,
            inner,
        })
    }

    pub fn driver_version(&self) -> Result<i32, String> {
        result::init().map_err(|error| error.to_string())?;
        let mut version = MaybeUninit::<i32>::uninit();
        unsafe { sys::cuDriverGetVersion(version.as_mut_ptr()) }
            .result()
            .map_err(|error| error.to_string())?;
        Ok(unsafe { version.assume_init() })
    }

    pub fn devices(&self) -> Result<Vec<CudaDeviceInfo>, String> {
        result::init().map_err(|error| error.to_string())?;
        let count = result::device::get_count().map_err(|error| error.to_string())?;
        let mut devices = Vec::new();
        for ordinal in 0..count {
            let device = result::device::get(ordinal).map_err(|error| error.to_string())?;
            let name = result::device::get_name(device).map_err(|error| error.to_string())?;
            let uuid = result::device::get_uuid(device).map_err(|error| error.to_string())?;
            let pci_bus_id = interop::device_pci_bus_id(device)?;
            devices.push(CudaDeviceInfo {
                ordinal,
                name,
                uuid: uuid.bytes.map(|byte| byte as u8),
                pci_bus_id,
            });
        }
        Ok(devices)
    }

    pub fn describe_error(&self, result: sys::CUresult) -> String {
        let mut name = std::ptr::null();
        let mut description = std::ptr::null();
        let name = if unsafe { sys::cuGetErrorName(result, &mut name) }
            .result()
            .is_ok()
            && !name.is_null()
        {
            unsafe { CStr::from_ptr(name) }
                .to_string_lossy()
                .into_owned()
        } else {
            "UNKNOWN_CUDA_ERROR".to_string()
        };
        let description = if unsafe { sys::cuGetErrorString(result, &mut description) }
            .result()
            .is_ok()
            && !description.is_null()
        {
            unsafe { CStr::from_ptr(description) }
                .to_string_lossy()
                .into_owned()
        } else {
            "no CUDA error description available".to_string()
        };
        format!("{name} ({result:?}): {description}")
    }

    pub fn allocate_rgba_frame(
        &self,
        width: u32,
        height: u32,
    ) -> Result<CudaDeviceAllocation<'_>, String> {
        let width_bytes = width as usize * 4;
        let mut device_ptr = MaybeUninit::<sys::CUdeviceptr>::uninit();
        let mut pitch = MaybeUninit::<usize>::uninit();
        unsafe {
            sys::cuMemAllocPitch_v2(
                device_ptr.as_mut_ptr(),
                pitch.as_mut_ptr(),
                width_bytes,
                height as usize,
                16,
            )
            .result()
            .map_err(|error| error.to_string())?;
        }
        Ok(CudaDeviceAllocation {
            _driver: self,
            device_ptr: unsafe { device_ptr.assume_init() },
            width,
            height,
            pitch: unsafe { pitch.assume_init() },
        })
    }

    pub fn copy_image_to_rgba_frame(
        &self,
        source: &ImportedCudaExternalImage,
        destination: &CudaDeviceAllocation<'_>,
    ) -> Result<(), String> {
        interop::copy_image_to_rgba_frame(
            source,
            destination.device_ptr,
            destination.pitch,
            destination.width,
            destination.height,
        )
    }

    pub fn copy_rgba_frame_to_image(
        &self,
        source: &CudaDeviceAllocation<'_>,
        destination: &ImportedCudaExternalImage,
    ) -> Result<(), String> {
        interop::copy_rgba_frame_to_image(
            source.device_ptr,
            source.pitch,
            destination,
            source.width,
            source.height,
        )
    }

    pub fn synchronize_context(&self) -> Result<(), String> {
        unsafe { sys::cuCtxSynchronize() }
            .result()
            .map_err(|error| error.to_string())
    }

    pub fn create_nv12_to_rgba_converter(
        &self,
        context: &CudaContext<'_>,
    ) -> Result<CudaNv12ToRgbaConverter<'_>, String> {
        let converter = kernel::CudaNv12ToRgbaConverter::new(context.inner.clone())?;
        Ok(CudaNv12ToRgbaConverter {
            _driver: self,
            context: context.inner.clone(),
            converter,
        })
    }
}

pub struct CudaContext<'a> {
    _driver: &'a CudaDriver,
    inner: Arc<CudaContextInner>,
}

impl CudaContext<'_> {
    pub fn ordinal(&self) -> i32 {
        self.inner.ordinal() as i32
    }

    pub fn set_current(&self) -> Result<(), String> {
        self.inner
            .bind_to_thread()
            .map_err(|error| error.to_string())
    }
}

pub struct CudaDeviceAllocation<'a> {
    _driver: &'a CudaDriver,
    device_ptr: sys::CUdeviceptr,
    width: u32,
    height: u32,
    pitch: usize,
}

impl CudaDeviceAllocation<'_> {
    pub fn as_video_frame(&self, pts: Option<i64>) -> CudaVideoFrame {
        CudaVideoFrame::from_device_ptr(
            self.device_ptr,
            self.width,
            self.height,
            self.pitch as u64,
            pts,
        )
    }

    pub fn dimensions(&self) -> (u32, u32) {
        (self.width, self.height)
    }

    pub fn device_ptr(&self) -> sys::CUdeviceptr {
        self.device_ptr
    }

    pub fn pitch(&self) -> usize {
        self.pitch
    }

    pub fn clear(&self, value: u8) -> Result<(), String> {
        unsafe {
            sys::cuMemsetD8_v2(
                self.device_ptr,
                value,
                self.pitch.saturating_mul(self.height as usize),
            )
            .result()
            .map_err(|error| error.to_string())
        }
    }
}

impl Drop for CudaDeviceAllocation<'_> {
    fn drop(&mut self) {
        let _ = unsafe { sys::cuMemFree_v2(self.device_ptr).result() };
    }
}

pub struct CudaNv12ToRgbaConverter<'a> {
    _driver: &'a CudaDriver,
    context: Arc<CudaContextInner>,
    converter: kernel::CudaNv12ToRgbaConverter,
}

impl CudaNv12ToRgbaConverter<'_> {
    pub fn convert(
        &self,
        source: &super::CudaDecodedFrame,
        destination: &CudaDeviceAllocation<'_>,
    ) -> Result<(), String> {
        let _ = self.context.bind_to_thread();
        self.converter.convert(source, destination)
    }
}

pub fn import_vulkan_opaque_fd_image<'a>(
    _driver: &'a CudaDriver,
    memory: CudaExternalMemoryHandle,
    allocation_size: u64,
    width: u32,
    height: u32,
) -> Result<ImportedCudaExternalImage, String> {
    interop::import_vulkan_opaque_fd_image(memory, allocation_size, width, height)
}

pub fn import_owned_vulkan_opaque_fd_image<'a>(
    _driver: &'a CudaDriver,
    fd: std::os::fd::OwnedFd,
    allocation_size: u64,
    width: u32,
    height: u32,
) -> Result<ImportedCudaExternalImage, String> {
    interop::import_owned_vulkan_opaque_fd_image(fd, allocation_size, width, height)
}