cros_codecs/backend/
vaapi.rs

1// Copyright 2022 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! VAAPI backend for both stateless decoders and encoders.
6
7use std::collections::HashSet;
8use std::fmt::Debug;
9use std::os::fd::AsRawFd;
10
11use anyhow::anyhow;
12use byteorder::ByteOrder;
13use byteorder::LittleEndian;
14use libva::Display;
15use libva::VAConfigAttrib;
16use libva::VAConfigAttribType;
17
18use crate::utils::DmabufFrame;
19use crate::utils::UserPtrFrame;
20use crate::DecodedFormat;
21
22pub mod decoder;
23pub mod encoder;
24pub mod surface_pool;
25
26fn va_rt_format_to_string(va_rt_format: u32) -> String {
27    String::from(match va_rt_format {
28        libva::constants::VA_RT_FORMAT_YUV420 => "YUV420",
29        libva::constants::VA_RT_FORMAT_YUV422 => "YUV422",
30        libva::constants::VA_RT_FORMAT_YUV444 => "YUV444",
31        libva::constants::VA_RT_FORMAT_YUV420_10 => "YUV420_10",
32        libva::constants::VA_RT_FORMAT_YUV420_12 => "YUV420_12",
33        libva::constants::VA_RT_FORMAT_YUV422_10 => "YUV422_10",
34        libva::constants::VA_RT_FORMAT_YUV422_12 => "YUV422_12",
35        libva::constants::VA_RT_FORMAT_YUV444_10 => "YUV444_10",
36        libva::constants::VA_RT_FORMAT_YUV444_12 => "YUV444_12",
37        other => return format!("unknown VA rt_format {}", other),
38    })
39}
40
41#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
42struct FormatMap {
43    pub rt_format: u32,
44    pub va_fourcc: u32,
45    pub decoded_format: DecodedFormat,
46}
47
48/// Maps a given VA_RT_FORMAT to a compatible decoded format in an arbitrary
49/// preferred order.
50const FORMAT_MAP: [FormatMap; 10] = [
51    FormatMap {
52        rt_format: libva::constants::VA_RT_FORMAT_YUV420,
53        va_fourcc: libva::constants::VA_FOURCC_NV12,
54        decoded_format: DecodedFormat::NV12,
55    },
56    FormatMap {
57        rt_format: libva::constants::VA_RT_FORMAT_YUV420,
58        va_fourcc: libva::constants::VA_FOURCC_I420,
59        decoded_format: DecodedFormat::I420,
60    },
61    FormatMap {
62        rt_format: libva::constants::VA_RT_FORMAT_YUV422,
63        va_fourcc: libva::constants::VA_FOURCC_422H,
64        decoded_format: DecodedFormat::I422,
65    },
66    FormatMap {
67        rt_format: libva::constants::VA_RT_FORMAT_YUV444,
68        va_fourcc: libva::constants::VA_FOURCC_444P,
69        decoded_format: DecodedFormat::I444,
70    },
71    FormatMap {
72        rt_format: libva::constants::VA_RT_FORMAT_YUV420_10,
73        va_fourcc: libva::constants::VA_FOURCC_P010,
74        decoded_format: DecodedFormat::I010,
75    },
76    FormatMap {
77        rt_format: libva::constants::VA_RT_FORMAT_YUV420_12,
78        va_fourcc: libva::constants::VA_FOURCC_P012,
79        decoded_format: DecodedFormat::I012,
80    },
81    FormatMap {
82        rt_format: libva::constants::VA_RT_FORMAT_YUV422_10,
83        va_fourcc: libva::constants::VA_FOURCC_Y210,
84        decoded_format: DecodedFormat::I210,
85    },
86    FormatMap {
87        rt_format: libva::constants::VA_RT_FORMAT_YUV422_12,
88        va_fourcc: libva::constants::VA_FOURCC_Y212,
89        decoded_format: DecodedFormat::I212,
90    },
91    FormatMap {
92        rt_format: libva::constants::VA_RT_FORMAT_YUV444_10,
93        va_fourcc: libva::constants::VA_FOURCC_Y410,
94        decoded_format: DecodedFormat::I410,
95    },
96    FormatMap {
97        rt_format: libva::constants::VA_RT_FORMAT_YUV444_12,
98        va_fourcc: libva::constants::VA_FOURCC_Y412,
99        decoded_format: DecodedFormat::I412,
100    },
101];
102
103/// Returns a set of supported decoded formats given `rt_format`
104fn supported_formats_for_rt_format(
105    display: &Display,
106    rt_format: u32,
107    profile: i32,
108    entrypoint: u32,
109    image_formats: &[libva::VAImageFormat],
110) -> anyhow::Result<HashSet<FormatMap>> {
111    let mut attrs = vec![VAConfigAttrib {
112        type_: VAConfigAttribType::VAConfigAttribRTFormat,
113        value: 0,
114    }];
115
116    display.get_config_attributes(profile, entrypoint, &mut attrs)?;
117
118    // See whether this RT_FORMAT is supported by the given VAProfile and
119    // VAEntrypoint pair.
120    if attrs[0].value == libva::constants::VA_ATTRIB_NOT_SUPPORTED
121        || attrs[0].value & rt_format == 0
122    {
123        return Err(anyhow!(
124            "rt_format {:?} not supported for profile {:?} and entrypoint {:?}",
125            rt_format,
126            profile,
127            entrypoint
128        ));
129    }
130
131    let mut supported_formats = HashSet::new();
132
133    for format in FORMAT_MAP {
134        if format.rt_format == rt_format {
135            supported_formats.insert(format);
136        }
137    }
138
139    // Only retain those that the hardware can actually map into.
140    supported_formats.retain(|&entry| {
141        image_formats
142            .iter()
143            .any(|fmt| fmt.fourcc == entry.va_fourcc)
144    });
145
146    Ok(supported_formats)
147}
148
149impl TryFrom<&libva::VAImageFormat> for DecodedFormat {
150    type Error = anyhow::Error;
151
152    fn try_from(value: &libva::VAImageFormat) -> Result<Self, Self::Error> {
153        match value.fourcc {
154            libva::constants::VA_FOURCC_I420 => Ok(DecodedFormat::I420),
155            libva::constants::VA_FOURCC_NV12 => Ok(DecodedFormat::NV12),
156            libva::constants::VA_FOURCC_P010 => Ok(DecodedFormat::I010),
157            libva::constants::VA_FOURCC_P012 => Ok(DecodedFormat::I012),
158            libva::constants::VA_FOURCC_Y210 => Ok(DecodedFormat::I210),
159            libva::constants::VA_FOURCC_Y212 => Ok(DecodedFormat::I212),
160            libva::constants::VA_FOURCC_Y410 => Ok(DecodedFormat::I410),
161            libva::constants::VA_FOURCC_Y412 => Ok(DecodedFormat::I412),
162            _ => Err(anyhow!("Unsupported format")),
163        }
164    }
165}
166
167/// Copies `src` into `dst` removing all padding and converting from biplanar to triplanar format.
168///
169/// `useful_pixels` is the number of useful pixels in each sample, e.g. `10` for `P010`, `12` for
170/// `P012`, etc.
171///
172/// This function is VAAPI-specific because of the unusual the source pixels are laid out: VAAPI
173/// writes the `useful_pixels` MSBs, but software generally expects the LSBs to contain the data.
174fn p01x_to_i01x(
175    src: &[u8],
176    dst: &mut [u8],
177    useful_pixels: usize,
178    width: usize,
179    height: usize,
180    strides: [usize; 3],
181    offsets: [usize; 3],
182) {
183    let sample_shift = 16 - useful_pixels;
184
185    // Copy Y.
186    //
187    // VAAPI's Y samples are two byte little endian with the bottom six bits ignored. We need to
188    // convert that to two byte little endian with top 6 bits ignored.
189
190    let src_y_lines = src[offsets[0]..]
191        .chunks(strides[0])
192        .map(|line| &line[..width * 2]);
193    let dst_y_lines = dst.chunks_mut(width * 2);
194
195    for (src_line, dst_line) in src_y_lines.zip(dst_y_lines).take(height) {
196        for (src_y, dst_y) in src_line.chunks(2).zip(dst_line.chunks_mut(2)) {
197            LittleEndian::write_u16(dst_y, LittleEndian::read_u16(src_y) >> sample_shift);
198        }
199    }
200
201    let dst_u_offset = width * 2 * height;
202
203    // Align width and height to 2 for UV plane.
204    let width = if width % 2 == 1 { width + 1 } else { width };
205    let height = if height % 2 == 1 { height + 1 } else { height };
206    // 1 sample per 4 pixels, but we have two components per line so width remains as-is.
207    let height = height / 2;
208
209    let dst_u_size = width * height;
210
211    // Copy U and V and deinterleave into different planes.
212    //
213    // We need to perform the same bit shift as luma, but also to de-interleave the data.
214    let src_uv_lines = src[offsets[1]..]
215        .chunks(strides[1])
216        .map(|line| &line[..width * 2]);
217    let (dst_u_plane, dst_v_plane) = dst[dst_u_offset..].split_at_mut(dst_u_size);
218    let dst_u_lines = dst_u_plane.chunks_mut(width);
219    let dst_v_lines = dst_v_plane.chunks_mut(width);
220    for (src_line, (dst_u_line, dst_v_line)) in
221        src_uv_lines.zip(dst_u_lines.zip(dst_v_lines)).take(height)
222    {
223        for ((src_u, src_v), (dst_u, dst_v)) in src_line
224            .chunks(4)
225            .map(|chunk| (&chunk[0..2], &chunk[2..4]))
226            .zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2)))
227        {
228            LittleEndian::write_u16(dst_u, LittleEndian::read_u16(src_u) >> sample_shift);
229            LittleEndian::write_u16(dst_v, LittleEndian::read_u16(src_v) >> sample_shift);
230        }
231    }
232}
233
234/// Copies `src` into `dst` as I21x, removing all padding and changing the layout from packed to
235/// triplanar.
236///
237/// `useful_pixels` is the number of useful pixels in each sample, e.g. `10` for `Y210` or `16` for
238/// `Y216`.
239///
240/// This function is VAAPI-specific because of the unusual the source pixels are laid out: VAAPI
241/// writes the `useful_pixels` MSBs, but software generally expects the LSBs to contain the data.
242///
243/// WARNING: this function could not be tested for lack of supporting hardware.
244fn y21x_to_i21x(
245    src: &[u8],
246    dst: &mut [u8],
247    useful_pixels: usize,
248    width: usize,
249    height: usize,
250    strides: [usize; 3],
251    offsets: [usize; 3],
252) {
253    let sample_shift = 16 - useful_pixels;
254    // Align width to 2 for U and V planes and divide by 2.
255    // This should not be necessary as the sampling method requires that width is a multiple of 2
256    // to begin with.
257    let uv_width = if width % 2 == 1 { width + 1 } else { width } / 2;
258
259    // YUYV representation, i.e. 4 16-bit words per two Y samples meaning we have 4 * width bytes
260    // of data per line.
261    let src_lines = src[offsets[0]..]
262        .chunks(strides[0])
263        .map(|line| &line[..width * 4]);
264
265    let dst_y_size = width * 2 * height;
266    let dst_u_size = uv_width * 2 * height;
267
268    let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size);
269    let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size);
270    let dst_y_lines = dst_y_plane.chunks_mut(width * 2);
271    let dst_u_lines = dst_u_plane.chunks_mut(uv_width * 2);
272    let dst_v_lines = dst_v_plane.chunks_mut(uv_width * 2);
273
274    for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in src_lines
275        .zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines)))
276        .take(height)
277    {
278        for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(8).zip(
279            dst_y_line
280                .chunks_mut(4)
281                .zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))),
282        ) {
283            let y0 = LittleEndian::read_u16(&src[0..2]) >> sample_shift;
284            let u = LittleEndian::read_u16(&src[2..4]) >> sample_shift;
285            let y1 = LittleEndian::read_u16(&src[4..6]) >> sample_shift;
286            let v = LittleEndian::read_u16(&src[6..8]) >> sample_shift;
287
288            LittleEndian::write_u16(&mut dst_y[0..2], y0);
289            LittleEndian::write_u16(&mut dst_y[2..4], y1);
290            LittleEndian::write_u16(dst_u, u);
291            LittleEndian::write_u16(dst_v, v);
292        }
293    }
294}
295
296/// Copies `src` into `dst` as I412, removing all padding and changing the layout from packed to
297/// triplanar. Also drops the alpha channel.
298///
299/// This function is VAAPI-specific because the samples need to be rolled somehow...
300fn y412_to_i412(
301    src: &[u8],
302    dst: &mut [u8],
303    width: usize,
304    height: usize,
305    strides: [usize; 3],
306    offsets: [usize; 3],
307) {
308    let src_lines = src[offsets[0]..]
309        .chunks(strides[0])
310        .map(|line| &line[..width * 8]);
311
312    let dst_y_size = width * 2 * height;
313    let dst_u_size = width * 2 * height;
314
315    let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size);
316    let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size);
317    let dst_y_lines = dst_y_plane.chunks_mut(width * 2);
318    let dst_u_lines = dst_u_plane.chunks_mut(width * 2);
319    let dst_v_lines = dst_v_plane.chunks_mut(width * 2);
320
321    for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in src_lines
322        .zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines)))
323        .take(height)
324    {
325        for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(8).zip(
326            dst_y_line
327                .chunks_mut(2)
328                .zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))),
329        ) {
330            let y = LittleEndian::read_u16(&src[2..4]);
331            let u = LittleEndian::read_u16(&src[0..2]);
332            let v = LittleEndian::read_u16(&src[4..6]);
333            // Why is that rotate_right neeed??
334            LittleEndian::write_u16(dst_y, y.rotate_right(4));
335            LittleEndian::write_u16(dst_u, u.rotate_right(4));
336            LittleEndian::write_u16(dst_v, v.rotate_right(4));
337        }
338    }
339}
340
341impl libva::ExternalBufferDescriptor for UserPtrFrame {
342    const MEMORY_TYPE: libva::MemoryType = libva::MemoryType::UserPtr;
343    type DescriptorAttribute = libva::VASurfaceAttribExternalBuffers;
344
345    fn va_surface_attribute(&mut self) -> Self::DescriptorAttribute {
346        let pitches = self
347            .layout
348            .planes
349            .iter()
350            .map(|p| p.stride as u32)
351            .chain(std::iter::repeat(0))
352            .take(4)
353            .collect::<Vec<_>>()
354            .try_into()
355            .unwrap();
356        let offsets = self
357            .layout
358            .planes
359            .iter()
360            .map(|p| p.offset as u32)
361            .chain(std::iter::repeat(0))
362            .take(4)
363            .collect::<Vec<_>>()
364            .try_into()
365            .unwrap();
366
367        libva::VASurfaceAttribExternalBuffers {
368            pixel_format: self.layout.format.0.into(),
369            width: self.layout.size.width,
370            height: self.layout.size.height,
371            data_size: self.mem_layout.size() as u32,
372            num_planes: self.layout.planes.len() as u32,
373            pitches,
374            offsets,
375            buffers: self.buffers.as_mut_ptr() as *mut _,
376            num_buffers: self.buffers.len() as u32,
377            flags: 0,
378            private_data: std::ptr::null_mut(),
379        }
380    }
381}
382
383impl libva::ExternalBufferDescriptor for DmabufFrame {
384    const MEMORY_TYPE: libva::MemoryType = libva::MemoryType::DrmPrime2;
385    type DescriptorAttribute = libva::VADRMPRIMESurfaceDescriptor;
386
387    fn va_surface_attribute(&mut self) -> Self::DescriptorAttribute {
388        let objects = self
389            .fds
390            .iter()
391            .map(|fd| libva::VADRMPRIMESurfaceDescriptorObject {
392                fd: fd.as_raw_fd(),
393                size: nix::sys::stat::fstat(fd.as_raw_fd())
394                    .map(|stat| stat.st_size as u32)
395                    // If we don't have the information about the plane fd size, fallback to 0.
396                    // Libva seems to be *sometimes* "happy" with zero.
397                    .unwrap_or(0),
398                // TODO should the descriptor be moved to individual objects?
399                drm_format_modifier: self.layout.format.1,
400            })
401            .chain(std::iter::repeat(Default::default()))
402            .take(4)
403            .collect::<Vec<_>>()
404            .try_into()
405            .unwrap();
406
407        let layers = [
408            libva::VADRMPRIMESurfaceDescriptorLayer {
409                drm_format: self.layout.format.0.into(),
410                num_planes: self.layout.planes.len() as u32,
411                object_index: [0, 0, 0, 0],
412                offset: self
413                    .layout
414                    .planes
415                    .iter()
416                    .map(|p| p.offset as u32)
417                    .chain(std::iter::repeat(0))
418                    .take(4)
419                    .collect::<Vec<_>>()
420                    .try_into()
421                    .unwrap(),
422                pitch: self
423                    .layout
424                    .planes
425                    .iter()
426                    .map(|p| p.stride as u32)
427                    .chain(std::iter::repeat(0))
428                    .take(4)
429                    .collect::<Vec<_>>()
430                    .try_into()
431                    .unwrap(),
432            },
433            Default::default(),
434            Default::default(),
435            Default::default(),
436        ];
437
438        libva::VADRMPRIMESurfaceDescriptor {
439            // TODO should we match and use VA_FOURCC_* here?
440            fourcc: self.layout.format.0.into(),
441            width: self.layout.size.width,
442            height: self.layout.size.height,
443            num_objects: 1,
444            objects,
445            num_layers: 1,
446            layers,
447        }
448    }
449}