Skip to main content

vk_video/
vulkan_video.rs

1pub mod capabilities {
2    pub use crate::adapter::AdapterInfo;
3    pub use crate::device::caps::{
4        DecodeCapabilities, DecodeH264Capabilities, DecodeH264ProfileCapabilities,
5        EncodeCapabilities, EncodeH264Capabilities, EncodeH264ProfileCapabilities,
6    };
7
8    pub use ash::vk::PhysicalDeviceType as VulkanDeviceType;
9}
10
11pub mod parameters {
12    pub use crate::adapter::VulkanAdapterDescriptor;
13    pub use crate::device::{
14        ColorRange, ColorSpace, DecoderParameters, EncoderOutputParameters, EncoderParameters,
15        MissedFrameHandling, Rational, VideoParameters, VulkanDeviceDescriptor,
16    };
17    pub use crate::vulkan_encoder::RateControl;
18    #[cfg(feature = "transcoder")]
19    pub use crate::vulkan_transcoder::{TranscoderOutputParameters, TranscoderParameters};
20
21    #[cfg(feature = "wgpu")]
22    pub use crate::wgpu_helpers::WgpuConverterParameters;
23
24    pub use ash::vk::VideoDecodeUsageFlagsKHR as DecoderUsageFlags;
25
26    pub use ash::vk::VideoEncodeContentFlagsKHR as EncoderContentFlags;
27    pub use ash::vk::VideoEncodeTuningModeKHR as EncoderTuningMode;
28    pub use ash::vk::VideoEncodeUsageFlagsKHR as EncoderUsageFlags;
29
30    /// Scaling algorithm used when resizing frames in the transcoder.
31    #[derive(Debug, Clone, Copy, Default)]
32    #[repr(u32)]
33    pub enum ScalingAlgorithm {
34        NearestNeighbor,
35        #[default]
36        Bilinear,
37        Lanczos3,
38    }
39
40    /// A profile in H264 is a set of codec features used while encoding a specific video.
41    /// Baseline uses the fewest features, Main can use more and High even more than Main.
42    #[derive(Debug, Clone, Copy)]
43    pub enum H264Profile {
44        Baseline,
45        Main,
46        High,
47    }
48
49    impl H264Profile {
50        pub(crate) fn to_profile_idc(self) -> ash::vk::native::StdVideoH264ProfileIdc {
51            match self {
52                H264Profile::Baseline => {
53                    ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_BASELINE
54                }
55                H264Profile::Main => {
56                    ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_MAIN
57                }
58                H264Profile::High => {
59                    ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_HIGH
60                }
61            }
62        }
63    }
64}
65
66#[cfg(feature = "wgpu")]
67mod wgpu_api;
68#[cfg(feature = "wgpu")]
69pub use wgpu_api::*;
70
71use crate::device::{ColorRange, ColorSpace};
72use crate::parser::h264::AccessUnit;
73use crate::vulkan_decoder::{FrameSorter, VulkanDecoder};
74use ash::vk;
75
76pub use crate::adapter::VulkanAdapter;
77pub use crate::device::VulkanDevice;
78pub use crate::instance::VulkanInstance;
79pub use crate::parser::{h264::H264ParserError, reference_manager::ReferenceManagementError};
80pub use crate::vulkan_decoder::VulkanDecoderError;
81pub use crate::vulkan_encoder::VulkanEncoderError;
82#[cfg(feature = "transcoder")]
83pub use crate::vulkan_transcoder::{Transcoder, TranscoderError};
84
85#[cfg(feature = "wgpu")]
86pub use crate::wgpu_helpers::{
87    WgpuConverterInitError, WgpuNv12ToRgbaConverter, WgpuRgbaToNv12Converter,
88};
89
90use crate::parser::{
91    decoder_instructions::compile_to_decoder_instructions, h264::H264Parser,
92    reference_manager::ReferenceContext,
93};
94use crate::vulkan_encoder::VulkanEncoder;
95use crate::wrappers::ImageKey;
96
97#[derive(Debug, thiserror::Error)]
98pub enum DecoderError {
99    #[error("Decoder error: {0}")]
100    VulkanDecoderError(#[from] VulkanDecoderError),
101
102    #[error("H264 parser error: {0}")]
103    ParserError(#[from] H264ParserError),
104
105    #[error("Reference management error: {0}")]
106    ReferenceManagementError(#[from] ReferenceManagementError),
107}
108
109#[derive(thiserror::Error, Debug)]
110pub enum VulkanInitError {
111    #[error("Error loading vulkan: {0}")]
112    LoadingError(#[from] ash::LoadingError),
113
114    #[error("Vulkan error: {0}")]
115    VkError(#[from] vk::Result),
116
117    #[cfg(feature = "wgpu")]
118    #[error(transparent)]
119    WgpuError(#[from] WgpuInitError),
120
121    #[error("Cannot find a suitable physical device")]
122    NoDevice,
123
124    #[error("String conversion error: {0}")]
125    StringConversionError(#[from] std::ffi::FromBytesUntilNulError),
126
127    #[error("Profile does not support NV12 texture format")]
128    NoNV12ProfileSupport,
129}
130
131#[derive(thiserror::Error, Debug)]
132pub enum VulkanCommonError {
133    #[error("Vulkan error: {0}")]
134    VkError(#[from] vk::Result),
135
136    #[error("Cannot find a queue with index {0}")]
137    NoQueue(usize),
138
139    #[error("Memory copy requested to a buffer that is not set up for receiving input")]
140    UploadToImproperBuffer,
141
142    #[error("A slot in the Decoded Pictures Buffer was requested, but all slots are taken")]
143    NoFreeSlotsInDpb,
144
145    #[error("DPB can have at most 32 slots, {0} was requested")]
146    DpbTooLong(u32),
147
148    #[error("Tried to wait for an unsignaled semaphore value")]
149    SemaphoreWaitOnUnsignaledValue,
150
151    #[error("Tried to register {0:x?} as a new image, while it already exists")]
152    RegisteredNewImageTwice(ImageKey),
153
154    #[error("Tried to access state of image {0:x?}, which does not exist")]
155    TriedToAccessNonexistentImageState(ImageKey),
156
157    #[error("Tried to unregister image {0:x?} that was not registered")]
158    UnregisteredNonexistentImage(ImageKey),
159
160    #[error("Unsupported image aspect: {0:?}")]
161    UnsupportedImageAspect(vk::ImageAspectFlags),
162}
163
164/// Represents a chunk of encoded video data used for decoding.
165///
166/// `pts` is the presentation timestamp -- a number, which describes when the given frame
167/// should be presented, used for synchronization with other tracks, e.g. with audio
168///
169/// If `pts` is [`Option::Some`], it is inferred that the chunk contains bytestream that belongs to
170/// one output frame.
171/// If `pts` is [`Option::None`], the chunk can contain bytestream from multiple consecutive
172/// frames.
173pub struct EncodedInputChunk<'a> {
174    pub data: &'a [u8],
175    pub pts: Option<u64>,
176}
177
178/// Represents all events that can be sent to the decoder
179#[non_exhaustive]
180pub enum DecoderEvent<'a> {
181    /// Submit encoded chunk for decoding
182    DecodeChunk(EncodedInputChunk<'a>),
183
184    /// Signal the end of the current frame and flush any buffered bitstream units in the parser.
185    ///
186    /// You should send this event only if you need to minimize the codec parsing latency.
187    /// The decoder does not require it to work.
188    ///
189    /// Send this only after submitting all bitstream units belonging to a single frame.
190    /// Any incomplete bitstream units buffered in the parser will be flushed and decoded,
191    /// which may lead to artifacts.
192    SignalFrameEnd,
193
194    /// Signal the decoder that a chunk of the bitstream was lost.
195    ///
196    /// What the decoder will do depends on the set [`parameters::MissedFrameHandling`]
197    SignalDataLoss,
198
199    /// Flush all frames from the decoder.
200    ///
201    /// Make sure that this is done when you have the knowledge that no more frames will be coming
202    /// that need to be presented before the already decoded frames.
203    Flush,
204}
205
206/// Represents a chunk of encoded video data returned by the encoder.
207///
208/// `pts` is the presentation timestamp -- a number, which describes when the given frame
209/// should be presented, used for synchronization with other tracks, e.g. with audio
210pub struct EncodedOutputChunk<T> {
211    pub data: T,
212    pub pts: Option<u64>,
213    pub is_keyframe: bool,
214}
215
216/// Represents a frame to be encoded.
217pub struct InputFrame<T> {
218    pub data: T,
219    pub pts: Option<u64>,
220}
221
222/// Additional information about the decoded frame.
223pub struct FrameMetadata {
224    pub pts: Option<u64>,
225    pub color_space: ColorSpace,
226    pub color_range: ColorRange,
227}
228
229/// Represents a single decoded frame.
230pub struct OutputFrame<T> {
231    pub data: T,
232    pub metadata: FrameMetadata,
233}
234
235pub struct RawFrameData {
236    pub frame: Vec<u8>,
237    pub width: u32,
238    pub height: u32,
239}
240
241/// A decoder that outputs frames stored as [`Vec<u8>`] with the raw pixel data.
242pub struct BytesDecoder {
243    pub(crate) vulkan_decoder: VulkanDecoder<'static>,
244    pub(crate) parser: H264Parser,
245    pub(crate) reference_ctx: ReferenceContext,
246    pub(crate) frame_sorter: FrameSorter<RawFrameData>,
247}
248
249impl BytesDecoder {
250    /// The result is a sequence of frames. The payload of each [`OutputFrame`] struct is a [`Vec<u8>`]. Each [`Vec<u8>`] contains a single
251    /// decoded frame in the [NV12 format](https://en.wikipedia.org/wiki/YCbCr#4:2:0).
252    pub fn decode(
253        &mut self,
254        frame: EncodedInputChunk<'_>,
255    ) -> Result<Vec<OutputFrame<RawFrameData>>, DecoderError> {
256        self.process_event(DecoderEvent::DecodeChunk(frame))
257    }
258
259    /// Flush all frames from the decoder.
260    ///
261    /// Make sure that this is done when you have the knowledge that no more frames will be coming
262    /// that need to be presented before the already decoded frames.
263    pub fn flush(&mut self) -> Result<Vec<OutputFrame<RawFrameData>>, DecoderError> {
264        self.process_event(DecoderEvent::Flush)
265    }
266
267    /// Process a [`DecoderEvent`]. For most use cases, using [`Self::decode`] and [`Self::flush`] is enough.
268    /// Use this only when you need more fine-grained control.
269    /// May return a sequence of decoded frames in the [NV12 format](https://en.wikipedia.org/wiki/YCbCr#4:2:0).
270    pub fn process_event(
271        &mut self,
272        event: DecoderEvent<'_>,
273    ) -> Result<Vec<OutputFrame<RawFrameData>>, DecoderError> {
274        match event {
275            DecoderEvent::DecodeChunk(chunk) => {
276                let nalus = self.parser.parse(chunk.data, chunk.pts)?;
277                self.decode_access_units(nalus)
278            }
279            DecoderEvent::SignalFrameEnd => {
280                let access_units = self.parser.flush()?;
281                self.decode_access_units(access_units)
282            }
283            DecoderEvent::SignalDataLoss => {
284                self.reference_ctx.mark_missed_frames();
285                Ok(Vec::new())
286            }
287            DecoderEvent::Flush => {
288                let access_units = self.parser.flush()?;
289                let mut frames = self.decode_access_units(access_units)?;
290                frames.append(&mut self.frame_sorter.flush());
291                Ok(frames)
292            }
293        }
294    }
295
296    fn decode_access_units(
297        &mut self,
298        access_units: Vec<AccessUnit>,
299    ) -> Result<Vec<OutputFrame<RawFrameData>>, DecoderError> {
300        let instructions = compile_to_decoder_instructions(&mut self.reference_ctx, access_units)?;
301        let unsorted_frames = self.vulkan_decoder.decode_to_bytes(&instructions)?;
302        let sorted_frames = self.frame_sorter.put_frames(unsorted_frames);
303        Ok(sorted_frames)
304    }
305}
306
307/// An encoder that takes input frames as [`Vec<u8>`] with raw pixel data (in NV12)
308pub struct BytesEncoder {
309    pub(crate) vulkan_encoder: VulkanEncoder<'static>,
310}
311
312impl BytesEncoder {
313    /// The result is a chunk of H264 bytecode.
314    ///
315    /// If the `force_keyframe` option is set to `true`, the encoder will encode this frame as a
316    /// [keyframe](https://en.wikipedia.org/wiki/Video_compression_picture_types#Intra-coded_(I)_frames/slices_(key_frames)).
317    /// Otherwise, the encoder will decide which frames should be coded this way.
318    pub fn encode(
319        &mut self,
320        frame: &InputFrame<RawFrameData>,
321        force_keyframe: bool,
322    ) -> Result<EncodedOutputChunk<Vec<u8>>, VulkanEncoderError> {
323        self.vulkan_encoder.encode_bytes(frame, force_keyframe)
324    }
325
326    /// Retrieve encoded SPS NAL units from the video session parameters, in Annex B.
327    ///
328    /// Useful when `inline_stream_params` is `false` and the parameters need to be
329    /// sent out-of-band (e.g. in RTMP or MP4 headers).
330    pub fn sps(&self) -> Result<Vec<u8>, VulkanEncoderError> {
331        self.vulkan_encoder.stream_parameters(true, false)
332    }
333
334    /// Retrieve encoded PPS NAL units from the video session parameters, in Annex B.
335    ///
336    /// Useful when `inline_stream_params` is `false` and the parameters need to be
337    /// sent out-of-band (e.g. in RTMP or MP4 headers).
338    pub fn pps(&self) -> Result<Vec<u8>, VulkanEncoderError> {
339        self.vulkan_encoder.stream_parameters(false, true)
340    }
341}