Skip to main content

lumen_ffmpeg/
video.rs

1use std::ptr;
2
3#[cfg(feature = "metal")]
4use std::ptr::NonNull;
5
6use crate::{
7    FfmpegError, Result,
8    ffi::{self, AvFrame, sys},
9    format::{InputContext, Packet, Rational},
10    gpu::{GpuBackend, GpuVideoFrame},
11};
12#[cfg(target_os = "linux")]
13use sys::SWS_BILINEAR;
14#[cfg(not(target_os = "linux"))]
15use sys::SwsFlags::SWS_BILINEAR;
16use sys::{
17    AVCodecID::{
18        AV_CODEC_ID_AV1, AV_CODEC_ID_H264, AV_CODEC_ID_HEVC, AV_CODEC_ID_NONE, AV_CODEC_ID_PRORES,
19        AV_CODEC_ID_VP9,
20    },
21    AVHWDeviceType::{
22        AV_HWDEVICE_TYPE_CUDA, AV_HWDEVICE_TYPE_VIDEOTOOLBOX, AV_HWDEVICE_TYPE_VULKAN,
23    },
24    AVPixelFormat::{
25        AV_PIX_FMT_BGRA, AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, AV_PIX_FMT_NV12, AV_PIX_FMT_P010BE,
26        AV_PIX_FMT_P010LE, AV_PIX_FMT_RGBA, AV_PIX_FMT_VIDEOTOOLBOX, AV_PIX_FMT_VULKAN,
27    },
28};
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum VideoCodec {
32    H264,
33    Hevc,
34    Av1,
35    Vp9,
36    ProRes,
37    Unknown,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum PixelFormat {
42    Cuda,
43    Rgba8,
44    Bgra8,
45    Nv12,
46    P010,
47    Vulkan,
48    Metal,
49    Unknown,
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum DecodeMode {
54    Cpu,
55    Gpu(GpuBackend),
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum EncodeMode {
60    CpuUpload,
61    GpuTexture(GpuBackend),
62}
63
64#[derive(Debug, Clone)]
65pub struct CpuVideoFrame {
66    pub width: u32,
67    pub height: u32,
68    pub stride: usize,
69    pub pixel_format: PixelFormat,
70    pub pts: Option<i64>,
71    pub data: Vec<u8>,
72}
73
74#[derive(Debug, Clone, Copy)]
75pub struct VideoDecoderConfig {
76    pub stream_index: usize,
77    pub mode: DecodeMode,
78}
79
80impl Default for VideoDecoderConfig {
81    fn default() -> Self {
82        Self {
83            stream_index: 0,
84            mode: DecodeMode::Cpu,
85        }
86    }
87}
88
89pub struct VideoDecoder {
90    stream_index: usize,
91    mode: DecodeMode,
92    codec: VideoCodec,
93    time_base: Rational,
94    context: *mut sys::AVCodecContext,
95    scaler: *mut sys::SwsContext,
96    hw_device: Option<HwDeviceContext>,
97}
98
99unsafe impl Send for VideoDecoder {}
100
101impl VideoCodec {
102    pub(crate) fn from_av_codec_id(codec_id: sys::AVCodecID) -> Self {
103        match codec_id {
104            AV_CODEC_ID_H264 => Self::H264,
105            AV_CODEC_ID_HEVC => Self::Hevc,
106            AV_CODEC_ID_AV1 => Self::Av1,
107            AV_CODEC_ID_VP9 => Self::Vp9,
108            AV_CODEC_ID_PRORES => Self::ProRes,
109            _ => Self::Unknown,
110        }
111    }
112
113    pub(crate) fn to_av_codec_id(self) -> sys::AVCodecID {
114        match self {
115            Self::H264 => AV_CODEC_ID_H264,
116            Self::Hevc => AV_CODEC_ID_HEVC,
117            Self::Av1 => AV_CODEC_ID_AV1,
118            Self::Vp9 => AV_CODEC_ID_VP9,
119            Self::ProRes => AV_CODEC_ID_PRORES,
120            Self::Unknown => AV_CODEC_ID_NONE,
121        }
122    }
123}
124
125impl PixelFormat {
126    pub(crate) fn from_av_pixel_format(format: sys::AVPixelFormat) -> Self {
127        match format {
128            AV_PIX_FMT_RGBA => Self::Rgba8,
129            AV_PIX_FMT_CUDA => Self::Cuda,
130            AV_PIX_FMT_BGRA => Self::Bgra8,
131            AV_PIX_FMT_NV12 => Self::Nv12,
132            AV_PIX_FMT_P010LE | AV_PIX_FMT_P010BE => Self::P010,
133            AV_PIX_FMT_VULKAN => Self::Vulkan,
134            AV_PIX_FMT_VIDEOTOOLBOX => Self::Metal,
135            _ => Self::Unknown,
136        }
137    }
138
139    pub(crate) fn to_av_pixel_format(self) -> sys::AVPixelFormat {
140        match self {
141            Self::Rgba8 => AV_PIX_FMT_RGBA,
142            Self::Cuda => AV_PIX_FMT_CUDA,
143            Self::Bgra8 => AV_PIX_FMT_BGRA,
144            Self::Nv12 => AV_PIX_FMT_NV12,
145            Self::P010 => AV_PIX_FMT_P010LE,
146            Self::Vulkan => AV_PIX_FMT_VULKAN,
147            Self::Metal => AV_PIX_FMT_VIDEOTOOLBOX,
148            Self::Unknown => AV_PIX_FMT_NONE,
149        }
150    }
151}
152
153impl VideoDecoder {
154    pub fn open(input: &InputContext, config: VideoDecoderConfig) -> Result<Self> {
155        let parameters = input.stream_parameters(config.stream_index)?;
156        let codec_id = unsafe { (*parameters).codec_id };
157        let stable_codec = VideoCodec::from_av_codec_id(codec_id);
158        let codec = find_decoder(codec_id)?;
159        if let DecodeMode::Gpu(backend) = config.mode {
160            ensure_hardware_decoder(codec, stable_codec, backend)?;
161        }
162        let context = unsafe { sys::avcodec_alloc_context3(codec) };
163        if context.is_null() {
164            return Err(FfmpegError::new(
165                "avcodec_alloc_context3",
166                "failed to allocate video decoder context",
167            ));
168        }
169
170        let mut decoder = Self {
171            stream_index: config.stream_index,
172            mode: config.mode,
173            codec: stable_codec,
174            time_base: input.stream_time_base(config.stream_index)?.into(),
175            context,
176            scaler: ptr::null_mut(),
177            hw_device: None,
178        };
179
180        unsafe {
181            ffi::check(
182                sys::avcodec_parameters_to_context(decoder.context, parameters),
183                "avcodec_parameters_to_context",
184            )
185            .map_err(|error| {
186                error
187                    .with_codec(stable_codec)
188                    .with_stream_index(config.stream_index)
189            })?;
190            if matches!(config.mode, DecodeMode::Gpu(_)) {
191                (*decoder.context).thread_count = 1;
192                (*decoder.context).thread_type = 0;
193            } else {
194                (*decoder.context).thread_count = 0;
195                (*decoder.context).thread_type = sys::FF_THREAD_FRAME | sys::FF_THREAD_SLICE;
196            }
197        }
198
199        if let DecodeMode::Gpu(backend) = config.mode {
200            decoder.hw_device = Some(HwDeviceContext::create(backend)?);
201            unsafe {
202                (*decoder.context).hw_device_ctx =
203                    sys::av_buffer_ref(decoder.hw_device.as_ref().unwrap().ptr);
204                (*decoder.context).get_format = match backend {
205                    GpuBackend::Metal => Some(force_metal_pixel_format),
206                    GpuBackend::Vulkan => Some(force_vulkan_pixel_format),
207                    GpuBackend::Cuda => Some(force_cuda_pixel_format),
208                };
209            }
210        }
211
212        unsafe {
213            ffi::check(
214                sys::avcodec_open2(decoder.context, codec, ptr::null_mut()),
215                "avcodec_open2",
216            )
217            .map_err(|error| {
218                error
219                    .with_codec(stable_codec)
220                    .with_stream_index(config.stream_index)
221            })?;
222        }
223
224        Ok(decoder)
225    }
226
227    pub fn codec(&self) -> VideoCodec {
228        self.codec
229    }
230
231    pub fn stream_index(&self) -> usize {
232        self.stream_index
233    }
234
235    pub fn time_base(&self) -> Rational {
236        self.time_base
237    }
238
239    pub fn send_packet(&mut self, packet: &Packet) -> Result<()> {
240        if packet.stream_index() != self.stream_index {
241            return Ok(());
242        }
243        unsafe {
244            ffi::check(
245                sys::avcodec_send_packet(self.context, packet.inner.as_ptr()),
246                "avcodec_send_packet",
247            )
248            .map_err(|error| {
249                error
250                    .with_codec(self.codec)
251                    .with_stream_index(self.stream_index)
252            })
253        }
254    }
255
256    pub fn send_eof(&mut self) -> Result<()> {
257        unsafe {
258            ffi::check(
259                sys::avcodec_send_packet(self.context, ptr::null()),
260                "avcodec_send_packet",
261            )
262        }
263    }
264
265    pub fn flush(&mut self) {
266        unsafe { sys::avcodec_flush_buffers(self.context) };
267    }
268
269    pub fn receive_cpu_frame(&mut self) -> Result<Option<CpuVideoFrame>> {
270        if let DecodeMode::Gpu(backend) = self.mode {
271            return Err(FfmpegError::new(
272                "receive_cpu_frame",
273                "hardware decoders produce GPU frames; create a CPU decoder to receive CPU bytes",
274            )
275            .with_backend(backend)
276            .with_codec(self.codec)
277            .with_stream_index(self.stream_index));
278        }
279        self.receive_rgba_frame()
280    }
281
282    /// Receives the next decoded frame as RGBA8.
283    ///
284    /// CPU decoders convert their native software frame with swscale. Hardware decoders first
285    /// transfer the decoded frame into software memory, then use the same RGBA8 conversion path.
286    /// This keeps callers independent from FFmpeg's platform-specific GPU frame handles while still
287    /// allowing decode itself to use VideoToolbox/NVDEC/Vulkan when available.
288    pub fn receive_rgba_frame(&mut self) -> Result<Option<CpuVideoFrame>> {
289        let mut frame = AvFrame::new()?;
290        match self.receive_frame(&mut frame)? {
291            ReceiveStatus::Again => Ok(None),
292            ReceiveStatus::Frame => self.frame_to_rgba(&frame).map(Some),
293        }
294    }
295
296    pub fn receive_gpu_frame(&mut self) -> Result<Option<GpuVideoFrame>> {
297        if self.mode == DecodeMode::Cpu {
298            return Err(FfmpegError::new(
299                "receive_gpu_frame",
300                "CPU decoders produce CPU frames; create a hardware decoder to receive GPU textures",
301            )
302            .with_codec(self.codec)
303            .with_stream_index(self.stream_index));
304        }
305        let mut frame = AvFrame::new()?;
306        match self.receive_frame(&mut frame)? {
307            ReceiveStatus::Again => Ok(None),
308            ReceiveStatus::Frame => self.frame_to_gpu(frame).map(Some),
309        }
310    }
311
312    fn receive_frame(&mut self, frame: &mut AvFrame) -> Result<ReceiveStatus> {
313        let result = unsafe { sys::avcodec_receive_frame(self.context, frame.as_mut_ptr()) };
314        if result == sys::AVERROR(libc::EAGAIN) || result == sys::AVERROR_EOF {
315            return Ok(ReceiveStatus::Again);
316        }
317        if result < 0 {
318            return Err(ffi::error_from_code("avcodec_receive_frame", result)
319                .with_codec(self.codec)
320                .with_stream_index(self.stream_index));
321        }
322        Ok(ReceiveStatus::Frame)
323    }
324
325    fn frame_to_rgba(&mut self, frame: &AvFrame) -> Result<CpuVideoFrame> {
326        let width = frame.width();
327        let height = frame.height();
328        let stride = width as usize * 4;
329        let mut data = vec![0; stride.saturating_mul(height as usize)];
330        let src_format = match frame.format() {
331            AV_PIX_FMT_CUDA | AV_PIX_FMT_VULKAN | AV_PIX_FMT_VIDEOTOOLBOX => {
332                let mut cpu_frame = AvFrame::new()?;
333                unsafe {
334                    ffi::check(
335                        sys::av_hwframe_transfer_data(cpu_frame.as_mut_ptr(), frame.as_ptr(), 0),
336                        "av_hwframe_transfer_data",
337                    )?;
338                }
339                return self.frame_to_rgba(&cpu_frame);
340            }
341            other => other,
342        };
343
344        if self.scaler.is_null() {
345            self.scaler = unsafe {
346                sys::sws_getContext(
347                    width as i32,
348                    height as i32,
349                    src_format,
350                    width as i32,
351                    height as i32,
352                    AV_PIX_FMT_RGBA,
353                    SWS_BILINEAR as i32,
354                    ptr::null_mut(),
355                    ptr::null_mut(),
356                    ptr::null(),
357                )
358            };
359            if self.scaler.is_null() {
360                return Err(FfmpegError::new(
361                    "sws_getContext",
362                    "failed to create RGBA conversion context",
363                ));
364            }
365        }
366
367        let mut dst_data = [
368            data.as_mut_ptr(),
369            ptr::null_mut(),
370            ptr::null_mut(),
371            ptr::null_mut(),
372        ];
373        let mut dst_stride = [stride as i32, 0, 0, 0];
374        unsafe {
375            sys::sws_scale(
376                self.scaler,
377                (*frame.as_ptr()).data.as_ptr() as *const *const u8,
378                (*frame.as_ptr()).linesize.as_ptr(),
379                0,
380                height as i32,
381                dst_data.as_mut_ptr(),
382                dst_stride.as_mut_ptr(),
383            );
384        }
385
386        Ok(CpuVideoFrame {
387            width,
388            height,
389            stride,
390            pixel_format: PixelFormat::Rgba8,
391            pts: frame.pts(),
392            data,
393        })
394    }
395
396    fn frame_to_gpu(&self, frame: AvFrame) -> Result<GpuVideoFrame> {
397        match (self.mode, frame.format()) {
398            #[cfg(feature = "cuda")]
399            (DecodeMode::Gpu(GpuBackend::Cuda), AV_PIX_FMT_CUDA) => self.frame_to_cuda(frame),
400            #[cfg(not(feature = "cuda"))]
401            (DecodeMode::Gpu(GpuBackend::Cuda), AV_PIX_FMT_CUDA) => Err(FfmpegError::new(
402                "receive_gpu_frame",
403                "crate was built without the cuda feature",
404            )
405            .with_backend(GpuBackend::Cuda)),
406            #[cfg(feature = "metal")]
407            (DecodeMode::Gpu(GpuBackend::Metal), AV_PIX_FMT_VIDEOTOOLBOX) => {
408                self.frame_to_metal(&frame)
409            }
410            #[cfg(not(feature = "metal"))]
411            (DecodeMode::Gpu(GpuBackend::Metal), AV_PIX_FMT_VIDEOTOOLBOX) => Err(FfmpegError::new(
412                "receive_gpu_frame",
413                "crate was built without the metal feature",
414            )
415            .with_backend(GpuBackend::Metal)),
416            #[cfg(feature = "vulkan")]
417            (DecodeMode::Gpu(GpuBackend::Vulkan), AV_PIX_FMT_VULKAN) => {
418                let _ = frame;
419                Err(FfmpegError::new(
420                    "receive_gpu_frame",
421                    "ffmpeg-sys-next does not expose AVVkFrame, so decoded Vulkan image handles cannot be exported safely yet",
422                )
423                .with_backend(GpuBackend::Vulkan))
424            }
425            #[cfg(not(feature = "vulkan"))]
426            (DecodeMode::Gpu(GpuBackend::Vulkan), AV_PIX_FMT_VULKAN) => Err(FfmpegError::new(
427                "receive_gpu_frame",
428                "crate was built without the vulkan feature",
429            )
430            .with_backend(GpuBackend::Vulkan)),
431            _ => Err(FfmpegError::new(
432                "receive_gpu_frame",
433                "decoder did not produce a hardware texture frame",
434            )),
435        }
436    }
437
438    #[cfg(feature = "metal")]
439    fn frame_to_metal(&self, frame: &AvFrame) -> Result<GpuVideoFrame> {
440        let pixel_buffer = NonNull::new(frame.data(3).cast()).ok_or_else(|| {
441            FfmpegError::new(
442                "receive_gpu_frame",
443                "VideoToolbox frame did not contain a CVPixelBuffer",
444            )
445            .with_backend(GpuBackend::Metal)
446        })?;
447        Ok(GpuVideoFrame::Metal(unsafe {
448            crate::gpu::MetalDecodedFrame::retain_from_video_toolbox_frame(
449                pixel_buffer,
450                frame.pts(),
451            )
452        }))
453    }
454
455    #[cfg(feature = "cuda")]
456    fn frame_to_cuda(&self, frame: AvFrame) -> Result<GpuVideoFrame> {
457        let device_ptr = frame.data(0) as u64;
458        let pitch = frame.line_size(0);
459        let width = frame.width();
460        let height = frame.height();
461        let pts = frame.pts();
462        if device_ptr == 0 || pitch <= 0 {
463            return Err(FfmpegError::new(
464                "receive_gpu_frame",
465                "CUDA decoded frame did not contain a valid device pointer and pitch",
466            )
467            .with_backend(GpuBackend::Cuda));
468        }
469        let pixel_format = frame
470            .hw_sw_format()
471            .map(PixelFormat::from_av_pixel_format)
472            .unwrap_or(PixelFormat::Unknown);
473        Ok(GpuVideoFrame::Cuda(
474            crate::gpu::CudaDecodedFrame::from_av_frame(
475                frame,
476                device_ptr,
477                width,
478                height,
479                pitch as u64,
480                pixel_format,
481                pts,
482            ),
483        ))
484    }
485}
486
487unsafe extern "C" fn force_metal_pixel_format(
488    _context: *mut sys::AVCodecContext,
489    formats: *const sys::AVPixelFormat,
490) -> sys::AVPixelFormat {
491    force_pixel_format(formats, AV_PIX_FMT_VIDEOTOOLBOX)
492}
493
494unsafe extern "C" fn force_vulkan_pixel_format(
495    _context: *mut sys::AVCodecContext,
496    formats: *const sys::AVPixelFormat,
497) -> sys::AVPixelFormat {
498    force_pixel_format(formats, AV_PIX_FMT_VULKAN)
499}
500
501unsafe extern "C" fn force_cuda_pixel_format(
502    _context: *mut sys::AVCodecContext,
503    formats: *const sys::AVPixelFormat,
504) -> sys::AVPixelFormat {
505    force_pixel_format(formats, AV_PIX_FMT_CUDA)
506}
507
508fn force_pixel_format(
509    formats: *const sys::AVPixelFormat,
510    desired: sys::AVPixelFormat,
511) -> sys::AVPixelFormat {
512    let mut current = formats;
513    while unsafe { *current } != AV_PIX_FMT_NONE {
514        let format = unsafe { *current };
515        if format == desired {
516            return format;
517        }
518        current = unsafe { current.add(1) };
519    }
520    AV_PIX_FMT_NONE
521}
522
523impl Drop for VideoDecoder {
524    fn drop(&mut self) {
525        unsafe {
526            if !self.scaler.is_null() {
527                sys::sws_freeContext(self.scaler);
528            }
529            sys::avcodec_free_context(&mut self.context);
530        }
531    }
532}
533
534enum ReceiveStatus {
535    Again,
536    Frame,
537}
538
539struct HwDeviceContext {
540    ptr: *mut sys::AVBufferRef,
541}
542
543impl HwDeviceContext {
544    fn create(backend: GpuBackend) -> Result<Self> {
545        let device_type = hw_device_type(backend);
546        let mut ptr = ptr::null_mut();
547        unsafe {
548            ffi::check(
549                sys::av_hwdevice_ctx_create(&mut ptr, device_type, ptr::null(), ptr::null_mut(), 0),
550                "av_hwdevice_ctx_create",
551            )
552            .map_err(|error| error.with_backend(backend))?;
553        }
554        Ok(Self { ptr })
555    }
556}
557
558fn hw_device_type(backend: GpuBackend) -> sys::AVHWDeviceType {
559    match backend {
560        GpuBackend::Metal => AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
561        GpuBackend::Vulkan => AV_HWDEVICE_TYPE_VULKAN,
562        GpuBackend::Cuda => AV_HWDEVICE_TYPE_CUDA,
563    }
564}
565
566fn hw_pixel_format(backend: GpuBackend) -> sys::AVPixelFormat {
567    match backend {
568        GpuBackend::Metal => AV_PIX_FMT_VIDEOTOOLBOX,
569        GpuBackend::Vulkan => AV_PIX_FMT_VULKAN,
570        GpuBackend::Cuda => AV_PIX_FMT_CUDA,
571    }
572}
573
574impl Drop for HwDeviceContext {
575    fn drop(&mut self) {
576        unsafe { sys::av_buffer_unref(&mut self.ptr) };
577    }
578}
579
580fn find_decoder(codec_id: sys::AVCodecID) -> Result<*const sys::AVCodec> {
581    let decoder = unsafe { sys::avcodec_find_decoder(codec_id) };
582    if decoder.is_null() {
583        Err(FfmpegError::new(
584            "avcodec_find_decoder",
585            format!(
586                "no decoder found for {}",
587                crate::format::codec_name(codec_id)
588            ),
589        ))
590    } else {
591        Ok(decoder)
592    }
593}
594
595fn ensure_hardware_decoder(
596    decoder: *const sys::AVCodec,
597    codec: VideoCodec,
598    backend: GpuBackend,
599) -> Result<()> {
600    let device_type = hw_device_type(backend);
601    let pixel_format = hw_pixel_format(backend);
602    let mut index = 0;
603    loop {
604        let config = unsafe { sys::avcodec_get_hw_config(decoder, index) };
605        if config.is_null() {
606            break;
607        }
608        let supports_device_context = unsafe {
609            ((*config).methods & sys::AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX as i32) != 0
610        };
611        let matches_backend =
612            unsafe { (*config).device_type == device_type && (*config).pix_fmt == pixel_format };
613        if supports_device_context && matches_backend {
614            return Ok(());
615        }
616        index += 1;
617    }
618
619    Err(FfmpegError::new(
620        "VideoDecoder::open",
621        format!("{backend:?} hardware decode is unavailable for {codec:?}"),
622    )
623    .with_backend(backend)
624    .with_codec(codec))
625}