cros_codecs/backend/vaapi/
encoder.rs

1// Copyright 2024 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::any::Any;
6use std::marker::PhantomData;
7use std::rc::Rc;
8
9use libva::Config;
10use libva::Context;
11use libva::Display;
12use libva::EncCodedBuffer;
13use libva::MappedCodedBuffer;
14use libva::Picture;
15use libva::PictureEnd;
16use libva::Surface;
17use libva::SurfaceMemoryDescriptor;
18use libva::UsageHint;
19use libva::VAEntrypoint::VAEntrypointEncSlice;
20use libva::VAEntrypoint::VAEntrypointEncSliceLP;
21use libva::VAProfile;
22use libva::VASurfaceStatus;
23
24use crate::backend::vaapi::surface_pool::PooledVaSurface;
25use crate::backend::vaapi::surface_pool::VaSurfacePool;
26use crate::backend::vaapi::FORMAT_MAP;
27use crate::decoder::FramePool;
28use crate::encoder::stateless::BackendPromise;
29use crate::encoder::stateless::StatelessBackendError;
30use crate::encoder::stateless::StatelessBackendResult;
31use crate::encoder::stateless::StatelessEncoderBackendImport;
32use crate::encoder::FrameMetadata;
33use crate::encoder::RateControl;
34use crate::encoder::Tunings;
35use crate::Fourcc;
36use crate::Resolution;
37
38/// The number of frames that encoder backend should initialize scratch pool with.
39const INITIAL_SCRATCH_POOL_SIZE: usize = 16;
40/// The maximum size of scratch pool size, after which the backend will refure to allocate more
41/// scratch frames.
42const MAX_SCRATCH_POOL_SIZE: usize = INITIAL_SCRATCH_POOL_SIZE * 4;
43
44impl From<libva::VaError> for StatelessBackendError {
45    fn from(value: libva::VaError) -> Self {
46        Self::Other(value.into())
47    }
48}
49
50pub(crate) fn tunings_to_libva_rc<const CLAMP_MIN_QP: u32, const CLAMP_MAX_QP: u32>(
51    tunings: &Tunings,
52) -> StatelessBackendResult<libva::EncMiscParameterRateControl> {
53    let bits_per_second = tunings.rate_control.bitrate_target().unwrap_or(0);
54    let bits_per_second = u32::try_from(bits_per_second).map_err(|e| anyhow::anyhow!(e))?;
55
56    // At the moment we don't support variable bitrate therefore target 100%
57    const TARGET_PERCENTAGE: u32 = 100;
58
59    // Window size in ms that the RC should apply to
60    const WINDOW_SIZE: u32 = 1_500;
61
62    // Clamp minium QP
63    let min_qp = tunings.min_quality.clamp(CLAMP_MIN_QP, CLAMP_MAX_QP);
64
65    let basic_unit_size = 0;
66
67    // Don't reset the rate controller
68    const RESET: u32 = 0;
69
70    // Don't skip frames
71    const DISABLE_FRAME_SKIP: u32 = 1;
72
73    // Allow bit stuffing
74    const DISABLE_BIT_STUFFING: u32 = 0;
75
76    // Use default
77    const MB_RATE_CONTROL: u32 = 0;
78
79    // SVC encoding is not supported for now
80    const TEMPORAL_ID: u32 = 0;
81
82    // Don't ensure intraframe size
83    const CFS_I_FRAMES: u32 = 0;
84
85    // We don't use hierarchical B frames currently
86    const ENABLE_PARALLEL_BRC: u32 = 0;
87
88    // Disable dynamic scaling
89    const ENABLE_DYNAMIC_SCALING: u32 = 0;
90
91    // Use default tolerance mode
92    const FRAME_TOLERANCE_MODE: u32 = 0;
93
94    // ICQ mode is not used
95    const ICQ_QUALITY_FACTOR: u32 = 0;
96
97    // Clamp maximum QP
98    let max_qp = tunings.max_quality.clamp(CLAMP_MIN_QP, CLAMP_MAX_QP);
99
100    // Unsed
101    const QUALITY_FACTOR: u32 = 0;
102
103    // No limits
104    const TARGET_FRAME_SIZE: u32 = 0;
105
106    // If ConstantQuality is used then set to it's value, otherwise use middle
107    let initial_qp = match tunings.rate_control {
108        RateControl::ConstantQuality(qp) => qp.clamp(min_qp, max_qp),
109        _ => (min_qp + max_qp) / 2,
110    };
111
112    Ok(libva::EncMiscParameterRateControl::new(
113        bits_per_second,
114        TARGET_PERCENTAGE,
115        WINDOW_SIZE,
116        initial_qp,
117        min_qp,
118        basic_unit_size,
119        libva::RcFlags::new(
120            RESET,
121            DISABLE_FRAME_SKIP,
122            DISABLE_BIT_STUFFING,
123            MB_RATE_CONTROL,
124            TEMPORAL_ID,
125            CFS_I_FRAMES,
126            ENABLE_PARALLEL_BRC,
127            ENABLE_DYNAMIC_SCALING,
128            FRAME_TOLERANCE_MODE,
129        ),
130        ICQ_QUALITY_FACTOR,
131        max_qp,
132        QUALITY_FACTOR,
133        TARGET_FRAME_SIZE,
134    ))
135}
136
137pub struct Reconstructed(PooledVaSurface<()>);
138
139impl Reconstructed {
140    pub(crate) fn surface(&self) -> &Surface<()> {
141        use std::borrow::Borrow;
142        Borrow::<Surface<()>>::borrow(&self.0)
143    }
144
145    pub(crate) fn surface_id(&self) -> u32 {
146        self.surface().id()
147    }
148}
149
150pub struct VaapiBackend<M, H>
151where
152    M: SurfaceMemoryDescriptor,
153    H: std::borrow::Borrow<Surface<M>> + 'static,
154{
155    /// VA config.
156    #[allow(dead_code)]
157    va_config: Config,
158
159    /// VA context used for encoding.
160    context: Rc<Context>,
161
162    _va_profile: VAProfile::Type,
163    scratch_pool: VaSurfacePool<()>,
164    _phantom: PhantomData<(M, H)>,
165}
166
167impl<M, H> VaapiBackend<M, H>
168where
169    M: SurfaceMemoryDescriptor,
170    H: std::borrow::Borrow<Surface<M>>,
171{
172    pub fn new(
173        display: Rc<Display>,
174        va_profile: VAProfile::Type,
175        fourcc: Fourcc,
176        coded_size: Resolution,
177        bitrate_control: u32,
178        low_power: bool,
179    ) -> StatelessBackendResult<Self> {
180        let format_map = FORMAT_MAP
181            .iter()
182            .find(|&map| map.va_fourcc == fourcc.0)
183            .ok_or_else(|| StatelessBackendError::UnsupportedFormat)?;
184
185        let rt_format = format_map.rt_format;
186
187        let va_config = display.create_config(
188            vec![
189                libva::VAConfigAttrib {
190                    type_: libva::VAConfigAttribType::VAConfigAttribRTFormat,
191                    value: rt_format,
192                },
193                libva::VAConfigAttrib {
194                    type_: libva::VAConfigAttribType::VAConfigAttribRateControl,
195                    value: bitrate_control,
196                },
197            ],
198            va_profile,
199            if low_power {
200                VAEntrypointEncSliceLP
201            } else {
202                VAEntrypointEncSlice
203            },
204        )?;
205
206        let context = display.create_context::<M>(
207            &va_config,
208            coded_size.width,
209            coded_size.height,
210            None,
211            true,
212        )?;
213
214        let mut scratch_pool = VaSurfacePool::new(
215            Rc::clone(&display),
216            rt_format,
217            Some(UsageHint::USAGE_HINT_ENCODER),
218            coded_size,
219        );
220
221        // TODO: Allow initial size to be changed
222        scratch_pool.add_frames(vec![(); INITIAL_SCRATCH_POOL_SIZE])?;
223
224        Ok(Self {
225            va_config,
226            context,
227            scratch_pool,
228            _va_profile: va_profile,
229            _phantom: Default::default(),
230        })
231    }
232
233    pub(crate) fn context(&self) -> &Rc<Context> {
234        &self.context
235    }
236
237    pub(crate) fn new_coded_buffer(
238        &self,
239        rate_control: &RateControl,
240    ) -> StatelessBackendResult<EncCodedBuffer> {
241        // Coded buffer size multiplier. It's inteded to give head room for the encoder.
242        const CODED_SIZE_MUL: usize = 2;
243
244        // Default coded buffer size if bitrate control is not used.
245        const DEFAULT_CODED_SIZE: usize = 1_500_000;
246
247        let coded_size = rate_control
248            .bitrate_target()
249            .map(|e| e as usize * CODED_SIZE_MUL)
250            .unwrap_or(DEFAULT_CODED_SIZE);
251
252        Ok(self.context().create_enc_coded(coded_size)?)
253    }
254
255    // Creates an empty surface that will be filled with reconstructed picture during encoding
256    // which will be later used as frame reference
257    pub(crate) fn new_scratch_picture(&mut self) -> StatelessBackendResult<Reconstructed> {
258        if self.scratch_pool.num_free_frames() == 0 {
259            if self.scratch_pool.num_managed_frames() >= MAX_SCRATCH_POOL_SIZE {
260                log::error!("Scratch pool is exhausted and hit the size limit");
261                return Err(StatelessBackendError::OutOfResources);
262            }
263
264            log::debug!(
265                "Scratch pool empty, allocating one more surface. (previous pool size: {})",
266                self.scratch_pool.num_managed_frames()
267            );
268            self.scratch_pool.add_frames(vec![()])?;
269        }
270
271        let surface = self
272            .scratch_pool
273            .get_surface()
274            .ok_or(StatelessBackendError::OutOfResources)?;
275
276        Ok(Reconstructed(surface))
277    }
278}
279
280impl<M, Handle> StatelessEncoderBackendImport<Handle, Handle> for VaapiBackend<M, Handle>
281where
282    M: SurfaceMemoryDescriptor,
283    Handle: std::borrow::Borrow<Surface<M>>,
284{
285    fn import_picture(
286        &mut self,
287        _metadata: &FrameMetadata,
288        handle: Handle,
289    ) -> StatelessBackendResult<Handle> {
290        Ok(handle)
291    }
292}
293
294impl<M> StatelessEncoderBackendImport<M, Surface<M>> for VaapiBackend<M, Surface<M>>
295where
296    M: libva::SurfaceMemoryDescriptor,
297{
298    fn import_picture(
299        &mut self,
300        meta: &FrameMetadata,
301        handle: M,
302    ) -> StatelessBackendResult<Surface<M>> {
303        let fourcc = meta.layout.format.0 .0;
304
305        let format_map = FORMAT_MAP
306            .iter()
307            .find(|&map| map.va_fourcc == fourcc)
308            .ok_or_else(|| StatelessBackendError::UnsupportedFormat)?;
309
310        log::debug!("Creating new surface for meta={meta:#?}");
311        let mut surfaces = self.context.display().create_surfaces(
312            format_map.rt_format,
313            Some(format_map.va_fourcc),
314            meta.layout.size.width,
315            meta.layout.size.height,
316            Some(UsageHint::USAGE_HINT_ENCODER),
317            vec![handle],
318        )?;
319
320        surfaces.pop().ok_or(StatelessBackendError::OutOfResources)
321    }
322}
323
324/// Vaapi's implementation of [`crate::encoder::stateless::BackendPromise`]
325pub struct CodedOutputPromise<M, P>
326where
327    M: SurfaceMemoryDescriptor,
328    P: std::borrow::Borrow<Surface<M>>,
329{
330    /// Currently processed picture/surface.
331    handle: Picture<PictureEnd, P>,
332
333    /// Hold reference frames/object from being dropped while `handle` is processed.
334    references: Vec<Rc<dyn Any>>,
335
336    // VaBuffer where the coded output will be present after processing
337    // is finished.
338    coded_buf: EncCodedBuffer,
339
340    /// Container for the request output. Moved from
341    /// [`crate::encoder::stateless::StatelessVideoEncoderBackend`] request. The output will be
342    /// appended to it.
343    coded_output: Vec<u8>,
344
345    _phantom: PhantomData<M>,
346}
347
348impl<M, P> CodedOutputPromise<M, P>
349where
350    M: SurfaceMemoryDescriptor,
351    P: std::borrow::Borrow<Surface<M>>,
352{
353    pub fn new(
354        handle: Picture<PictureEnd, P>,
355        references: Vec<Rc<dyn Any>>,
356        coded_buf: EncCodedBuffer,
357        coded_output: Vec<u8>,
358    ) -> Self {
359        Self {
360            handle,
361            references,
362            coded_buf,
363            coded_output,
364            _phantom: Default::default(),
365        }
366    }
367}
368
369impl<M, H> BackendPromise for CodedOutputPromise<M, H>
370where
371    M: SurfaceMemoryDescriptor,
372    H: std::borrow::Borrow<Surface<M>>,
373{
374    type Output = Vec<u8>;
375
376    fn sync(mut self) -> StatelessBackendResult<Self::Output> {
377        if let Err((err, _)) = self.handle.sync() {
378            // TODO consider going back to PictureEnd
379            return Err(err.into());
380        }
381
382        // Drop all references as processing is finished
383        self.references.clear();
384
385        // Map coded buffer and collect bitstream
386        let coded = MappedCodedBuffer::new(&self.coded_buf)?;
387        let mut bitstream = self.coded_output;
388        for segment in coded.segments() {
389            // TODO: Handle flags?
390            // NOTE: on flags: 0-7 bits are average QP value
391            if segment.bit_offset > 0 {
392                log::warn!("unsupported bit_offset != 0 (yet)");
393            }
394            bitstream.extend(segment.buf)
395        }
396
397        Ok(bitstream)
398    }
399
400    fn is_ready(&self) -> bool {
401        match self.handle.surface().query_status() {
402            Ok(status) => status == VASurfaceStatus::VASurfaceReady,
403            Err(_) => {
404                // An error occurred while processing or checking the status of the underlying
405                // processing, in both cases consider it is done. In either cases it will be
406                // returned with [`sync`].
407                true
408            }
409        }
410    }
411}
412
413#[cfg(test)]
414pub(crate) mod tests {
415    use std::borrow::Borrow;
416
417    use libva::constants::VA_FOURCC_NV12;
418    use libva::constants::VA_FOURCC_P010;
419
420    use super::*;
421    use crate::encoder::tests::fill_test_frame_nv12;
422    use crate::encoder::tests::fill_test_frame_p010;
423    use crate::encoder::tests::get_test_frame_t;
424    use crate::encoder::FrameMetadata;
425    use crate::FrameLayout;
426
427    fn map_surface<'a, M: SurfaceMemoryDescriptor>(
428        display: &Rc<Display>,
429        surface: &'a Surface<M>,
430        fourcc: u32,
431    ) -> libva::Image<'a> {
432        let image_fmts = display.query_image_formats().unwrap();
433        let image_fmt = image_fmts.into_iter().find(|f| f.fourcc == fourcc).unwrap();
434
435        libva::Image::create_from(surface, image_fmt, surface.size(), surface.size()).unwrap()
436    }
437
438    fn map_surface_nv12<'a, M: SurfaceMemoryDescriptor>(
439        display: &Rc<Display>,
440        surface: &'a Surface<M>,
441    ) -> libva::Image<'a> {
442        map_surface(display, surface, VA_FOURCC_NV12)
443    }
444
445    fn map_surface_p010<'a, M: SurfaceMemoryDescriptor>(
446        display: &Rc<Display>,
447        surface: &'a Surface<M>,
448    ) -> libva::Image<'a> {
449        map_surface(display, surface, VA_FOURCC_P010)
450    }
451
452    /// Uploads raw NV12 to Surface
453    pub fn upload_nv12_img<M: SurfaceMemoryDescriptor>(
454        display: &Rc<Display>,
455        surface: &Surface<M>,
456        width: u32,
457        height: u32,
458        data: &[u8],
459    ) {
460        let mut image = map_surface_nv12(display, surface);
461
462        let va_image = *image.image();
463        let dest = image.as_mut();
464        let width = width as usize;
465        let height = height as usize;
466
467        let mut src: &[u8] = data;
468        let mut dst = &mut dest[va_image.offsets[0] as usize..];
469
470        // Copy luma
471        for _ in 0..height {
472            dst[..width].copy_from_slice(&src[..width]);
473            dst = &mut dst[va_image.pitches[0] as usize..];
474            src = &src[width..];
475        }
476
477        // Advance to the offset of the chroma plane
478        let mut src = &data[width * height..];
479        let mut dst = &mut dest[va_image.offsets[1] as usize..];
480
481        let height = height / 2;
482
483        // Copy chroma
484        for _ in 0..height {
485            dst[..width].copy_from_slice(&src[..width]);
486            dst = &mut dst[va_image.pitches[1] as usize..];
487            src = &src[width..];
488        }
489
490        surface.sync().unwrap();
491        drop(image);
492    }
493
494    /// Helper struct. [`Iterator`] to fetch frames from [`SurfacePool`].
495    pub struct PooledFrameIterator {
496        counter: u64,
497        display: Rc<Display>,
498        pool: VaSurfacePool<()>,
499        frame_layout: FrameLayout,
500    }
501
502    impl PooledFrameIterator {
503        pub fn new(
504            display: Rc<Display>,
505            pool: VaSurfacePool<()>,
506            frame_layout: FrameLayout,
507        ) -> Self {
508            Self {
509                counter: 0,
510                display,
511                pool,
512                frame_layout,
513            }
514        }
515    }
516
517    impl Iterator for PooledFrameIterator {
518        type Item = (FrameMetadata, PooledVaSurface<()>);
519
520        fn next(&mut self) -> Option<Self::Item> {
521            let handle = self.pool.get_surface().unwrap();
522
523            let meta = FrameMetadata {
524                layout: self.frame_layout.clone(),
525                force_keyframe: false,
526                timestamp: self.counter,
527            };
528
529            self.counter += 1;
530
531            Some((meta, handle))
532        }
533    }
534
535    /// Helper struct. Uses [`Iterator`] with raw chunks and uploads to pooled surface from
536    /// [`SurfacePool`] to produce frames.
537    pub struct NV12FrameProducer<'l, I>
538    where
539        I: Iterator<Item = &'l [u8]>,
540    {
541        raw_iterator: I,
542        pool_iter: PooledFrameIterator,
543    }
544
545    impl<'l, I> NV12FrameProducer<'l, I>
546    where
547        I: Iterator<Item = &'l [u8]>,
548    {
549        #[allow(dead_code)]
550        pub fn new(
551            raw_iterator: I,
552            display: Rc<Display>,
553            pool: VaSurfacePool<()>,
554            frame_layout: FrameLayout,
555        ) -> Self {
556            Self {
557                raw_iterator,
558                pool_iter: PooledFrameIterator::new(display, pool, frame_layout),
559            }
560        }
561    }
562
563    impl<'l, I> Iterator for NV12FrameProducer<'l, I>
564    where
565        I: Iterator<Item = &'l [u8]>,
566    {
567        type Item = (FrameMetadata, PooledVaSurface<()>);
568
569        fn next(&mut self) -> Option<Self::Item> {
570            let raw = match self.raw_iterator.next() {
571                Some(raw) => raw,
572                None => return None,
573            };
574
575            let (meta, handle) = self.pool_iter.next().unwrap();
576
577            let width = meta.layout.size.width;
578            let height = meta.layout.size.height;
579            debug_assert_eq!((width * height + width * height / 2) as usize, raw.len());
580
581            upload_nv12_img(&self.pool_iter.display, handle.borrow(), width, height, raw);
582
583            Some((meta, handle))
584        }
585    }
586
587    pub fn upload_test_frame_nv12<M: SurfaceMemoryDescriptor>(
588        display: &Rc<Display>,
589        surface: &Surface<M>,
590        t: f32,
591    ) {
592        let mut image = map_surface_nv12(display, surface);
593
594        let (width, height) = image.display_resolution();
595
596        let offsets = image.image().offsets;
597        let pitches = image.image().pitches;
598
599        fill_test_frame_nv12(
600            width as usize,
601            height as usize,
602            [pitches[0] as usize, pitches[1] as usize],
603            [offsets[0] as usize, offsets[1] as usize],
604            t,
605            image.as_mut(),
606        );
607
608        drop(image);
609        surface.sync().unwrap();
610    }
611
612    pub fn upload_test_frame_p010<M: SurfaceMemoryDescriptor>(
613        display: &Rc<Display>,
614        surface: &Surface<M>,
615        t: f32,
616    ) {
617        let mut image = map_surface_p010(display, surface);
618
619        let (width, height) = image.display_resolution();
620
621        let offsets = image.image().offsets;
622        let pitches = image.image().pitches;
623
624        fill_test_frame_p010(
625            width as usize,
626            height as usize,
627            [pitches[0] as usize, pitches[1] as usize],
628            [offsets[0] as usize, offsets[1] as usize],
629            t,
630            image.as_mut(),
631        );
632
633        drop(image);
634        surface.sync().unwrap();
635    }
636
637    /// Helper struct. Procedurally generate NV12 frames for test purposes.
638    pub struct TestFrameGenerator {
639        counter: u64,
640        max_count: u64,
641        pool_iter: PooledFrameIterator,
642        display: Rc<Display>,
643        fourcc: Fourcc,
644    }
645
646    impl TestFrameGenerator {
647        pub fn new(
648            max_count: u64,
649            display: Rc<Display>,
650            pool: VaSurfacePool<()>,
651            frame_layout: FrameLayout,
652        ) -> Self {
653            Self {
654                counter: 0,
655                max_count,
656                fourcc: frame_layout.format.0,
657                pool_iter: PooledFrameIterator::new(display.clone(), pool, frame_layout),
658                display,
659            }
660        }
661    }
662
663    impl Iterator for TestFrameGenerator {
664        type Item = (FrameMetadata, PooledVaSurface<()>);
665
666        fn next(&mut self) -> Option<Self::Item> {
667            if self.counter > self.max_count {
668                return None;
669            }
670
671            self.counter += 1;
672
673            let (meta, handle) = self.pool_iter.next().unwrap();
674
675            let surface: &Surface<()> = handle.borrow();
676
677            let t = get_test_frame_t(meta.timestamp, self.max_count);
678            match self.fourcc.0 {
679                VA_FOURCC_NV12 => upload_test_frame_nv12(&self.display, surface, t),
680                VA_FOURCC_P010 => upload_test_frame_p010(&self.display, surface, t),
681                _ => unreachable!(),
682            }
683
684            Some((meta, handle))
685        }
686    }
687}