cros_codecs/decoder/stateless/
av1.rs

1// Copyright 2023 The ChromiumOS Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std::os::fd::AsFd;
6use std::os::fd::BorrowedFd;
7use std::rc::Rc;
8
9use anyhow::anyhow;
10
11use crate::codec::av1::parser::FrameHeaderObu;
12use crate::codec::av1::parser::FrameObu;
13use crate::codec::av1::parser::FrameType;
14use crate::codec::av1::parser::ObuType;
15use crate::codec::av1::parser::ParsedObu;
16use crate::codec::av1::parser::Parser;
17use crate::codec::av1::parser::SequenceHeaderObu;
18use crate::codec::av1::parser::TileGroupObu;
19use crate::codec::av1::parser::NUM_REF_FRAMES;
20use crate::decoder::stateless::DecodeError;
21use crate::decoder::stateless::DecodingState;
22use crate::decoder::stateless::NewPictureResult;
23use crate::decoder::stateless::StatelessBackendResult;
24use crate::decoder::stateless::StatelessCodec;
25use crate::decoder::stateless::StatelessDecoder;
26use crate::decoder::stateless::StatelessDecoderBackend;
27use crate::decoder::stateless::StatelessDecoderBackendPicture;
28use crate::decoder::stateless::StatelessVideoDecoder;
29use crate::decoder::stateless::TryFormat;
30use crate::decoder::BlockingMode;
31use crate::decoder::DecodedHandle;
32use crate::decoder::PoolLayer;
33use crate::Resolution;
34
35#[cfg(test)]
36mod dummy;
37#[cfg(feature = "vaapi")]
38mod vaapi;
39
40/// Stateless backend methods specific to AV1.
41pub trait StatelessAV1DecoderBackend:
42    StatelessDecoderBackend + StatelessDecoderBackendPicture<Av1>
43{
44    /// Called when a new Sequence Header OBU is parsed. The
45    /// `highest_spatial_layer` argument refers to the maximum layer selected by
46    /// the client through `set_operating_point()` and the scalability
47    /// information present in the stream, if any.
48    fn new_sequence(
49        &mut self,
50        sequence: &Rc<SequenceHeaderObu>,
51        highest_spatial_layer: Option<u32>,
52    ) -> StatelessBackendResult<()>;
53
54    /// Called when the decoder determines that a new picture was found. The backend allocates all
55    /// the resources it needs to process that picture.
56    fn new_picture(
57        &mut self,
58        hdr: &FrameHeaderObu,
59        timestamp: u64,
60        highest_spatial_layer: Option<u32>,
61    ) -> NewPictureResult<Self::Picture>;
62
63    /// Called to set the global parameters of a picture.
64    fn begin_picture(
65        &mut self,
66        picture: &mut Self::Picture,
67        sequence: &SequenceHeaderObu,
68        hdr: &FrameHeaderObu,
69        reference_frames: &[Option<Self::Handle>; NUM_REF_FRAMES],
70    ) -> StatelessBackendResult<()>;
71
72    /// Called to dispatch a decode operation to the backend.
73    #[allow(clippy::too_many_arguments)]
74    fn decode_tile_group(
75        &mut self,
76        picture: &mut Self::Picture,
77        tile_group: TileGroupObu,
78    ) -> StatelessBackendResult<()>;
79
80    /// Called when the decoder wants the backend to finish the decoding
81    /// operations for `picture`. At this point, `decode_tile` has been called
82    /// for all tiles.
83    fn submit_picture(&mut self, picture: Self::Picture) -> StatelessBackendResult<Self::Handle>;
84}
85
86/// State of the picture being currently decoded.
87///
88/// Stored between calls to [`StatelessDecoder::decode_tile_group`] that belong to the same
89/// picture.
90enum CurrentPicState<H: DecodedHandle, P> {
91    /// A regular frame
92    RegularFrame {
93        /// Data for the current picture as extracted from the stream.
94        header: FrameHeaderObu,
95        /// Backend-specific data for that picture.
96        backend_picture: P,
97    },
98
99    /// A frame that has 'show_existing_frame' set.
100    ShowExistingFrame {
101        /// Data for the current picture as extracted from the stream.
102        header: FrameHeaderObu,
103        /// The handle of the reference frame that this frame points to.
104        handle: H,
105    },
106}
107
108pub struct AV1DecoderState<H: DecodedHandle, P> {
109    /// AV1 bitstream parser.
110    parser: Parser,
111
112    /// The reference frames in use.
113    reference_frames: [Option<H>; NUM_REF_FRAMES],
114
115    /// Keeps track of the last values seen for negotiation purposes.
116    sequence: Option<Rc<SequenceHeaderObu>>,
117
118    /// The picture currently being decoded. We need to preserve it between
119    /// calls to `decode` because multiple tiles will be processed in different
120    /// calls to `decode`.
121    current_pic: Option<CurrentPicState<H, P>>,
122
123    /// Keep track of the number of frames we've processed for logging purposes.
124    frame_count: u32,
125
126    /// For SVC streams, we only want to output the highest layer possible given
127    /// the choice of operating point.
128    highest_spatial_layer: Option<u32>,
129}
130
131impl<H, P> Default for AV1DecoderState<H, P>
132where
133    H: DecodedHandle,
134{
135    fn default() -> Self {
136        Self {
137            parser: Default::default(),
138            reference_frames: Default::default(),
139            sequence: Default::default(),
140            current_pic: Default::default(),
141            frame_count: Default::default(),
142            highest_spatial_layer: Default::default(),
143        }
144    }
145}
146
147/// [`StatelessCodec`] structure to use in order to create a AV1 stateless decoder.
148///
149/// # Accepted input
150///
151/// the VP9 specification requires the last byte of the chunk to contain the superframe marker.
152/// Thus, a decoder using this codec processes exactly one encoded chunk per call to
153/// [`StatelessDecoder::decode`], and always returns the size of the passed input if successful.
154pub struct Av1;
155
156impl StatelessCodec for Av1 {
157    type FormatInfo = Rc<SequenceHeaderObu>;
158    type DecoderState<H: DecodedHandle, P> = AV1DecoderState<H, P>;
159}
160
161impl<B> StatelessDecoder<Av1, B>
162where
163    B: StatelessAV1DecoderBackend,
164    B::Handle: Clone,
165{
166    fn decode_frame_header(
167        &mut self,
168        frame_header: FrameHeaderObu,
169        timestamp: u64,
170    ) -> Result<(), DecodeError> {
171        log::debug!(
172            "Processing frame {} with timestamp {}",
173            self.codec.frame_count,
174            timestamp
175        );
176
177        if frame_header.show_existing_frame {
178            let ref_frame = self.codec.reference_frames
179                [frame_header.frame_to_show_map_idx as usize]
180                .as_ref()
181                .ok_or(anyhow!("Broken stream: no reference picture to display"))?;
182            self.codec.current_pic = Some(CurrentPicState::ShowExistingFrame {
183                header: frame_header,
184                handle: ref_frame.clone(),
185            });
186        } else if let Some(sequence) = &self.codec.sequence {
187            let mut backend_picture = self.backend.new_picture(
188                &frame_header,
189                timestamp,
190                self.codec.highest_spatial_layer,
191            )?;
192
193            self.backend.begin_picture(
194                &mut backend_picture,
195                sequence,
196                &frame_header,
197                &self.codec.reference_frames,
198            )?;
199
200            self.codec.current_pic = Some(CurrentPicState::RegularFrame {
201                header: frame_header.clone(),
202                backend_picture,
203            });
204        } else {
205            log::warn!("invalid stream: frame header received while no valid sequence ongoing");
206        }
207
208        Ok(())
209    }
210
211    fn decode_tile_group(&mut self, tile_group: TileGroupObu) -> anyhow::Result<()> {
212        let picture = match self.codec.current_pic.as_mut() {
213            Some(CurrentPicState::RegularFrame {
214                backend_picture, ..
215            }) => backend_picture,
216            Some(CurrentPicState::ShowExistingFrame { .. }) => {
217                return Err(anyhow!("Broken stream: cannot decode a tile group for a frame with show_existing_frame set"));
218            }
219            None => {
220                return Err(anyhow!(
221                "Broken stream: cannot decode a tile group without first decoding a frame header"
222            ))
223            }
224        };
225
226        self.backend.decode_tile_group(picture, tile_group)?;
227        Ok(())
228    }
229
230    fn decode_frame(&mut self, frame: FrameObu, timestamp: u64) -> Result<(), DecodeError> {
231        let FrameObu { header, tile_group } = frame;
232        self.decode_frame_header(header, timestamp)?;
233        self.decode_tile_group(tile_group)?;
234        Ok(())
235    }
236
237    fn submit_frame(&mut self, timestamp: u64) -> anyhow::Result<()> {
238        log::debug!(
239            "Finishing frame {} with timestamp: {}",
240            self.codec.frame_count,
241            timestamp
242        );
243
244        let picture = self.codec.current_pic.take();
245
246        let (handle, header) = match picture {
247            Some(CurrentPicState::RegularFrame {
248                header,
249                backend_picture,
250            }) => {
251                let handle = self.backend.submit_picture(backend_picture)?;
252
253                if self.blocking_mode == BlockingMode::Blocking {
254                    handle.sync()?;
255                }
256                (handle, header)
257            }
258            Some(CurrentPicState::ShowExistingFrame { header, handle }) => (handle, header),
259            None => return Err(anyhow!("Broken stream: no picture to submit")),
260        };
261
262        let update_refs = if header.show_existing_frame {
263            header.frame_type == FrameType::KeyFrame
264        } else {
265            true
266        };
267
268        if update_refs {
269            let mut refresh_frame_flags = header.refresh_frame_flags;
270
271            #[allow(clippy::needless_range_loop)]
272            for i in 0..NUM_REF_FRAMES {
273                if (refresh_frame_flags & 1) == 1 {
274                    log::debug!(
275                        "Replacing reference frame {} to new timestamp {} on frame count: {}",
276                        i,
277                        timestamp,
278                        self.codec.frame_count
279                    );
280                    self.codec.reference_frames[i] = Some(handle.clone());
281                }
282
283                refresh_frame_flags >>= 1;
284            }
285        }
286
287        let show_existing_frame = header.show_existing_frame;
288        if header.show_frame || show_existing_frame {
289            match self.codec.highest_spatial_layer {
290                None => self.ready_queue.push(handle),
291                Some(highest_spatial_layer) => {
292                    if header.obu_header.spatial_id >= highest_spatial_layer {
293                        self.ready_queue.push(handle);
294                    } else {
295                        log::debug!(
296                            "Dropping frame with spatial_id {}",
297                            header.obu_header.spatial_id
298                        );
299                    }
300                }
301            }
302        }
303
304        self.codec.parser.ref_frame_update(&header)?;
305        self.codec.frame_count += 1;
306        Ok(())
307    }
308}
309
310impl<B> StatelessVideoDecoder for StatelessDecoder<Av1, B>
311where
312    B: StatelessAV1DecoderBackend + TryFormat<Av1>,
313    B::Handle: Clone + 'static,
314{
315    type Handle = B::Handle;
316    type FramePool = B::FramePool;
317
318    /// Decode an AV1 stream.
319    ///
320    /// `bitstream` should initially be submitted as a whole temporal unit, however a call to this
321    /// method will only consume a single OBU. The caller must be careful to check the return value
322    /// and resubmit the remainder if the whole bitstream has not been consumed.
323    fn decode(&mut self, timestamp: u64, bitstream: &[u8]) -> Result<usize, DecodeError> {
324        let obu = match self.codec.parser.parse_obu(bitstream)? {
325            ParsedObu::Process(obu) => obu,
326            // This OBU should be dropped.
327            ParsedObu::Drop(length) => return Ok(length as usize),
328        };
329        let obu_length = obu.data.len();
330
331        let is_decode_op = matches!(
332            obu.header.obu_type,
333            ObuType::Frame | ObuType::FrameHeader | ObuType::TileGroup
334        );
335
336        if is_decode_op {
337            match self.decoding_state {
338                /* we want to be here */
339                DecodingState::Decoding => (),
340
341                /* otherwise... */
342                DecodingState::AwaitingStreamInfo => {
343                    /* Skip input until we get information from the stream. */
344                    return Ok(obu_length);
345                }
346                /* Ask the client to confirm the format before we can process this. */
347                DecodingState::AwaitingFormat(_) => return Err(DecodeError::CheckEvents),
348                DecodingState::Reset => {
349                    let mut parser = self.codec.parser.clone();
350
351                    let is_key_frame = match obu.header.obu_type {
352                        ObuType::Frame => {
353                            let frame = parser.parse_frame_obu(obu.clone())?;
354                            frame.header.frame_type == FrameType::KeyFrame
355                        }
356                        ObuType::FrameHeader => {
357                            let fh = parser.parse_frame_header_obu(&obu)?;
358                            fh.frame_type == FrameType::KeyFrame
359                        }
360                        _ => false,
361                    };
362
363                    /* we can only resume from key frames */
364                    if !is_key_frame {
365                        return Ok(obu_length);
366                    } else {
367                        self.decoding_state = DecodingState::Decoding;
368                    }
369                }
370            }
371        }
372
373        /* We are in `Decoding` state if we reached here */
374
375        match obu.header.obu_type {
376            ObuType::SequenceHeader => {
377                let sequence = self.codec.parser.parse_sequence_header_obu(&obu)?;
378                let sequence_differs = match &self.codec.sequence {
379                    Some(old_sequence) => **old_sequence != *sequence,
380                    None => true,
381                };
382
383                if matches!(self.decoding_state, DecodingState::AwaitingStreamInfo)
384                    || sequence_differs
385                {
386                    if self.codec.current_pic.is_some() {
387                        return Err(DecodeError::DecoderError(anyhow!(
388                                "broken stream: a picture is being decoded while a new sequence header is encountered"
389                            )));
390                    }
391
392                    /* make sure we sync *before* we clear any state in the backend */
393                    for f in &mut self.ready_queue.queue {
394                        /* TODO: this fixes av1-1-b8-03-sizeup on Intel
395                         * gen12, but we apparently do not do the same in
396                         * VP9. How is it that we do not get similar crashes there?
397                         *
398                         * TODO: syncing before calling new_sequence() in VP9 may fix some tests
399                         */
400                        f.sync()?;
401                    }
402
403                    log::debug!(
404                        "found new sequence, resolution: {:?}, profile: {:?}, bit depth: {:?}",
405                        Resolution::from((
406                            sequence.max_frame_width_minus_1 as u32 + 1,
407                            sequence.max_frame_height_minus_1 as u32 + 1
408                        )),
409                        sequence.seq_profile,
410                        sequence.bit_depth
411                    );
412                    /* there is nothing to drain, much like vp8 and vp9 */
413                    self.codec.highest_spatial_layer = self.codec.parser.highest_operating_point();
414                    self.backend
415                        .new_sequence(&sequence, self.codec.highest_spatial_layer)?;
416                    self.await_format_change(sequence);
417                }
418            }
419            ObuType::TemporalDelimiter => self.codec.parser.parse_temporal_delimiter_obu(&obu)?,
420            ObuType::FrameHeader => {
421                if self.codec.current_pic.is_some() {
422                    /* submit this frame immediately, as we need to update the
423                     * DPB and the reference info state *before* processing the
424                     * next frame */
425                    self.submit_frame(timestamp)?;
426                }
427                let frame_header = self.codec.parser.parse_frame_header_obu(&obu)?;
428                self.decode_frame_header(frame_header, timestamp)?;
429            }
430            ObuType::TileGroup => {
431                let tile_group = self.codec.parser.parse_tile_group_obu(obu)?;
432                self.decode_tile_group(tile_group)?;
433            }
434            ObuType::Frame => {
435                let frame = self.codec.parser.parse_frame_obu(obu)?;
436                self.decode_frame(frame, timestamp)?;
437                /* submit this frame immediately, as we need to update the
438                 * DPB and the reference info state *before* processing the
439                 * next frame */
440                self.submit_frame(timestamp)?;
441            }
442            ObuType::TileList => {
443                return Err(DecodeError::DecoderError(anyhow!(
444                    "large tile scale mode is not supported"
445                )));
446            }
447            other => {
448                log::debug!("skipping OBU of type {:?}", other);
449            }
450        }
451
452        /* Submit the last frame if we have reached the end of the temporal unit. */
453        if bitstream.len() == obu_length && self.codec.current_pic.is_some() {
454            self.submit_frame(timestamp)?;
455        }
456
457        Ok(obu_length)
458    }
459
460    fn flush(&mut self) -> Result<(), super::DecodeError> {
461        // Note: all the submitted frames are already in the ready queue.
462        self.codec.reference_frames = Default::default();
463        self.decoding_state = DecodingState::Reset;
464
465        Ok(())
466    }
467
468    fn frame_pool(&mut self, layer: PoolLayer) -> Vec<&mut B::FramePool> {
469        self.backend.frame_pool(layer)
470    }
471
472    fn stream_info(&self) -> Option<&crate::decoder::StreamInfo> {
473        self.backend.stream_info()
474    }
475
476    fn next_event(&mut self) -> Option<crate::decoder::DecoderEvent<B::Handle>> {
477        self.query_next_event(|decoder, sequence| {
478            decoder.codec.sequence = Some(Rc::clone(sequence));
479        })
480    }
481
482    fn poll_fd(&self) -> BorrowedFd {
483        self.epoll_fd.0.as_fd()
484    }
485}
486
487#[cfg(test)]
488pub mod tests {
489    use crate::decoder::stateless::av1::Av1;
490    use crate::decoder::stateless::tests::test_decode_stream;
491    use crate::decoder::stateless::tests::TestStream;
492    use crate::decoder::stateless::StatelessDecoder;
493    use crate::decoder::BlockingMode;
494    use crate::utils::simple_playback_loop;
495    use crate::utils::simple_playback_loop_owned_frames;
496    use crate::utils::IvfIterator;
497    use crate::DecodedFormat;
498
499    /// Run `test` using the dummy decoder, in both blocking and non-blocking modes.
500    fn test_decoder_dummy(test: &TestStream, blocking_mode: BlockingMode) {
501        let decoder = StatelessDecoder::<Av1, _>::new_dummy(blocking_mode).unwrap();
502
503        test_decode_stream(
504            |d, s, f| {
505                simple_playback_loop(
506                    d,
507                    IvfIterator::new(s),
508                    f,
509                    &mut simple_playback_loop_owned_frames,
510                    DecodedFormat::NV12,
511                    blocking_mode,
512                )
513            },
514            decoder,
515            test,
516            false,
517            false,
518        );
519    }
520
521    /// Same as Chromium's test-25fps.av1.ivf
522    pub const DECODE_TEST_25FPS: TestStream = TestStream {
523        stream: include_bytes!("../../codec/av1/test_data/test-25fps.ivf.av1"),
524        crcs: include_str!("../../codec/av1/test_data/test-25fps.ivf.av1.crc"),
525    };
526
527    #[test]
528    fn test_25fps_block() {
529        test_decoder_dummy(&DECODE_TEST_25FPS, BlockingMode::Blocking);
530    }
531
532    #[test]
533    fn test_25fps_nonblock() {
534        test_decoder_dummy(&DECODE_TEST_25FPS, BlockingMode::NonBlocking);
535    }
536}