Skip to main content

mediadecode_ffmpeg/
convert.rs

1//! Conversion helpers from FFmpeg `AVFrame` / `AVPacket` to the
2//! `mediadecode` types parameterized by [`crate::Ffmpeg`] and
3//! [`crate::FfmpegBuffer`].
4//!
5//! The video-frame conversion is **zero-copy**: each plane is exposed
6//! as an `FfmpegBuffer` view into the underlying `AVBufferRef`, so the
7//! FFmpeg-allocated pixel memory is shared between the source frame
8//! and the produced `VideoFrame`. Cloning the resulting `VideoFrame`
9//! bumps refcounts; dropping releases them.
10use core::ptr::{addr_of, read_unaligned};
11
12use ffmpeg_next::ffi::{
13  AV_NOPTS_VALUE, AVChromaLocation, AVColorPrimaries, AVColorRange, AVColorSpace,
14  AVColorTransferCharacteristic, AVFrame, AVPictureType, AVSubtitleType, av_buffer_alloc,
15};
16use mediadecode::{
17  PixelFormat, Timebase, Timestamp,
18  channel::AudioChannelLayout,
19  color::{ChromaLocation, ColorInfo, ColorMatrix, ColorPrimaries, ColorRange, ColorTransfer},
20  frame::{AudioFrame, Dimensions, Plane, Rect, SubtitleFrame, VideoFrame},
21  subtitle::SubtitlePayload,
22};
23
24use crate::{
25  FfmpegBuffer, boundary,
26  extras::{AudioFrameExtra, PictureType, SideDataEntry, SubtitleFrameExtra, VideoFrameExtra},
27  frame::{is_supported_cpu_pix_fmt, plane_height_for, plane_row_bytes_for},
28  sample_format::SampleFormat,
29};
30
31/// Errors from [`av_frame_to_video_frame`].
32#[derive(Debug)]
33#[non_exhaustive]
34pub enum ConvertError {
35  /// `av_frame` was null.
36  NullFrame,
37  /// The frame's pixel format isn't in the closed CPU-format set this
38  /// crate supports for safe per-plane access.
39  UnsupportedPixelFormat(PixelFormat),
40  /// A plane reported `linesize <= 0` or otherwise inconsistent layout.
41  InvalidPlaneLayout {
42    /// Plane index.
43    plane: usize,
44  },
45  /// Failed to acquire an `AVBufferRef` for a plane (out of memory, or
46  /// the frame's `data[i]` pointer doesn't lie inside any of `buf[]`).
47  BufferAcquireFailed {
48    /// Plane index whose buffer couldn't be acquired.
49    plane: usize,
50  },
51}
52
53impl core::fmt::Display for ConvertError {
54  fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
55    match self {
56      Self::NullFrame => write!(f, "convert: AVFrame pointer was null"),
57      Self::UnsupportedPixelFormat(pf) => {
58        write!(f, "convert: unsupported pixel format {pf:?}")
59      }
60      Self::InvalidPlaneLayout { plane } => {
61        write!(f, "convert: invalid layout on plane {plane}")
62      }
63      Self::BufferAcquireFailed { plane } => {
64        write!(f, "convert: could not acquire buffer ref for plane {plane}")
65      }
66    }
67  }
68}
69
70impl core::error::Error for ConvertError {}
71
72/// Safe wrapper around [`av_frame_to_video_frame`] taking a borrowed
73/// [`ffmpeg::Frame`](ffmpeg_next::Frame). Recommended entry point for
74/// most callers — equivalent to passing `frame.as_ptr()` to the
75/// unsafe variant, but the FFmpeg side keeps the frame alive for the
76/// duration of the call so the safety contract is satisfied
77/// internally.
78pub fn video_frame_from(
79  frame: &ffmpeg_next::Frame,
80  time_base: Timebase,
81) -> Result<VideoFrame<mediadecode::PixelFormat, VideoFrameExtra, FfmpegBuffer>, ConvertError> {
82  // SAFETY: `&frame` keeps the AVFrame alive for the duration of this
83  // call; the unsafe convert just reads through the pointer.
84  unsafe { av_frame_to_video_frame(frame.as_ptr(), time_base) }
85}
86
87/// Safe wrapper around [`av_frame_to_audio_frame`] taking a borrowed
88/// [`ffmpeg::frame::Audio`](ffmpeg_next::frame::Audio).
89pub fn audio_frame_from(
90  frame: &ffmpeg_next::frame::Audio,
91  time_base: Timebase,
92) -> Result<AudioFrame<SampleFormat, AudioChannelLayout, AudioFrameExtra, FfmpegBuffer>, ConvertError>
93{
94  // SAFETY: `&frame` keeps the AVFrame alive for the duration of this
95  // call.
96  unsafe { av_frame_to_audio_frame(frame.as_ptr(), time_base) }
97}
98
99/// Safe wrapper around [`av_subtitle_to_subtitle_frame`] taking a
100/// borrowed [`ffmpeg::Subtitle`](ffmpeg_next::Subtitle).
101pub fn subtitle_frame_from(
102  subtitle: &ffmpeg_next::Subtitle,
103  time_base: Timebase,
104) -> Result<SubtitleFrame<SubtitleFrameExtra, FfmpegBuffer>, ConvertError> {
105  // SAFETY: `&subtitle` keeps the AVSubtitle alive for the duration
106  // of this call.
107  unsafe { av_subtitle_to_subtitle_frame(subtitle.as_ptr(), time_base) }
108}
109
110/// Converts an FFmpeg `AVFrame` (CPU-side, post-`av_hwframe_transfer_data`
111/// or from a software decoder) into a `mediadecode::VideoFrame`
112/// parameterized by [`crate::Ffmpeg`] / [`crate::FfmpegBuffer`].
113///
114/// `time_base` is the source stream's time base, used to label
115/// `pts`/`duration` as mediatime [`Timestamp`]s.
116///
117/// # Safety
118///
119/// `av_frame` must be a live `*const AVFrame` for the duration of this
120/// call. The frame's `buf[]` references are not consumed; the produced
121/// `VideoFrame` holds its own refcounts on each underlying buffer.
122pub unsafe fn av_frame_to_video_frame(
123  av_frame: *const AVFrame,
124  time_base: Timebase,
125) -> Result<VideoFrame<mediadecode::PixelFormat, VideoFrameExtra, FfmpegBuffer>, ConvertError> {
126  if av_frame.is_null() {
127    return Err(ConvertError::NullFrame);
128  }
129  // We deliberately never form `&*av_frame` — `AVFrame` contains
130  // bindgen-enum fields (`pict_type`, `color_primaries`, `colorspace`,
131  // `color_trc`, `color_range`, `chroma_location`, and an embedded
132  // `AVChannelLayout` whose `order` is also enum-typed). If FFmpeg
133  // (or a hostile decoder) writes a value outside our bindgen's
134  // discriminant set, the `&AVFrame` reference itself would be
135  // immediate UB before any field access. Working through the raw
136  // pointer with field-by-field reads (and `addr_of!` for the
137  // enum-typed fields) sidesteps this whole class.
138
139  // Non-enum primitives are safe to read via `(*av_frame).field`
140  // because validity for `i32`/`i64`/pointer types is just
141  // "initialized bytes"; the surrounding struct's enum fields don't
142  // contaminate this read.
143  let format_raw = unsafe { (*av_frame).format };
144  let width_raw = unsafe { (*av_frame).width };
145  let height_raw = unsafe { (*av_frame).height };
146  let pts_raw = unsafe { (*av_frame).pts };
147  let duration_raw = unsafe { (*av_frame).duration };
148  let pix_fmt = boundary::from_av_pixel_format(format_raw);
149  let width = width_raw.max(0) as u32;
150  let height = height_raw.max(0) as u32;
151
152  // Build planes. We support the closed CPU-format set for which we
153  // know the per-plane height (NV*, P0xx/P2xx/P4xx). Unknown formats
154  // would let us read garbage `linesize * height` bytes — refuse.
155  if !is_supported_cpu_pix_fmt(pix_fmt) {
156    return Err(ConvertError::UnsupportedPixelFormat(pix_fmt));
157  }
158
159  let mut planes_out: [Plane<FfmpegBuffer>; 4] = [
160    plane_placeholder()?,
161    plane_placeholder()?,
162    plane_placeholder()?,
163    plane_placeholder()?,
164  ];
165  let mut plane_count: u8 = 0;
166
167  // The loop body indexes `planes_out`, the AVFrame's `linesize`, and
168  // its `data` array all by `plane_idx`. None of these are slices we
169  // can iterate via `iter_mut().enumerate()` — `linesize` / `data` are
170  // raw `[T; 8]` fields read through `(*av_frame).field[plane_idx]`,
171  // and `planes_out` is also indexed by the same key for symmetry —
172  // so the index-based loop is the natural shape.
173  #[allow(clippy::needless_range_loop)]
174  for plane_idx in 0..4 {
175    // Read per-plane fields through the raw pointer (no `&AVFrame`
176    // formed). `linesize` is `[c_int; 8]` and `data` is `[*mut u8; 8]`.
177    let linesize = unsafe { (*av_frame).linesize[plane_idx] };
178    if linesize <= 0 {
179      // Either we ran past the active plane count (linesize == 0) or
180      // the frame uses negative-stride vertical-flip (which our safe
181      // accessors refuse).
182      if linesize == 0 {
183        break;
184      }
185      return Err(ConvertError::InvalidPlaneLayout { plane: plane_idx });
186    }
187    let data_ptr = unsafe { (*av_frame).data[plane_idx] };
188    if data_ptr.is_null() {
189      return Err(ConvertError::InvalidPlaneLayout { plane: plane_idx });
190    }
191    let plane_h = plane_height_for(pix_fmt, plane_idx, height as usize)
192      .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
193    let row_bytes = plane_row_bytes_for(pix_fmt, plane_idx, width as usize)
194      .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
195    if row_bytes > linesize as usize {
196      return Err(ConvertError::InvalidPlaneLayout { plane: plane_idx });
197    }
198    // Safe-API stance for stride padding:
199    //
200    // Each row in the AVBufferRef is `linesize` bytes wide but only the
201    // first `row_bytes` of them are guaranteed-initialized (the
202    // codec's actual output). The remaining `linesize - row_bytes`
203    // bytes per row are FFmpeg-allocator scratch — `av_malloc`'d, not
204    // necessarily written by the decoder. Exposing those bytes as
205    // part of an `&[u8]` slice is UB even if no consumer reads them.
206    //
207    // - When `linesize == row_bytes` (no padding), zero-copy: refcount
208    //   the AVBufferRef and expose the full plane.
209    // - When `linesize > row_bytes`, we copy each row tightly into a
210    //   fresh AVBufferRef and expose that — `stride` becomes
211    //   `row_bytes` and the buffer's length is `row_bytes * plane_h`
212    //   with every byte initialized.
213    let (view, exported_stride) = if (linesize as usize) == row_bytes {
214      let plane_bytes = (plane_h)
215        .checked_mul(linesize as usize)
216        .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
217      let buf = unsafe { find_backing_buffer(av_frame, data_ptr, plane_bytes) }
218        .ok_or(ConvertError::BufferAcquireFailed { plane: plane_idx })?;
219      // Plain address subtraction (avoids `offset_from`'s
220      // strict-provenance requirement; the pointers are independent
221      // C-side casts).
222      let offset = unsafe { (data_ptr as usize).wrapping_sub((*buf).data as usize) };
223      // SAFETY: `buf` is non-null and live; offset + plane_bytes <= buf.size
224      // by find_backing_buffer's check.
225      let view = unsafe { FfmpegBuffer::from_ref_view(buf, offset, plane_bytes) }
226        .ok_or(ConvertError::BufferAcquireFailed { plane: plane_idx })?;
227      (view, linesize as u32)
228    } else {
229      let total_bytes = row_bytes
230        .checked_mul(plane_h)
231        .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
232      // Bound-check the readable extent in the source AVBufferRef
233      // BEFORE we start dereferencing per-row offsets. The zero-copy
234      // branch above did this implicitly by passing `plane_bytes` to
235      // `find_backing_buffer`; the copy branch must do the same — a
236      // buggy or hostile decoder/filter could hand us a `data_ptr`
237      // backed by a buffer too small for `(plane_h - 1) * linesize +
238      // row_bytes`, in which case `from_raw_parts` on the last few
239      // rows would form a slice over invalid memory (immediate UB,
240      // before any read).
241      let last_row_offset = (plane_h.saturating_sub(1))
242        .checked_mul(linesize as usize)
243        .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
244      let readable_extent = last_row_offset
245        .checked_add(row_bytes)
246        .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
247      // `find_backing_buffer` confirms the AVBufferRef in `(*av_frame).buf[]`
248      // that contains `data_ptr` covers at least `readable_extent`
249      // bytes from the data pointer. We don't need the returned ptr;
250      // we just need the existence guarantee.
251      unsafe { find_backing_buffer(av_frame, data_ptr, readable_extent) }
252        .ok_or(ConvertError::BufferAcquireFailed { plane: plane_idx })?;
253      let mut packed: std::vec::Vec<u8> = std::vec::Vec::new();
254      packed
255        .try_reserve_exact(total_bytes)
256        .map_err(|_| ConvertError::BufferAcquireFailed { plane: plane_idx })?;
257      for row_idx in 0..plane_h {
258        let row_offset = (row_idx)
259          .checked_mul(linesize as usize)
260          .ok_or(ConvertError::InvalidPlaneLayout { plane: plane_idx })?;
261        // SAFETY: bounds-checked above via `find_backing_buffer`;
262        // `row_offset + row_bytes <= readable_extent <= buf.size`.
263        // Each per-row slice is the part the decoder writes
264        // (initialized).
265        let row_slice =
266          unsafe { core::slice::from_raw_parts(data_ptr.add(row_offset) as *const u8, row_bytes) };
267        packed.extend_from_slice(row_slice);
268      }
269      let buf = FfmpegBuffer::copy_from_slice(&packed)
270        .ok_or(ConvertError::BufferAcquireFailed { plane: plane_idx })?;
271      (buf, row_bytes as u32)
272    };
273
274    planes_out[plane_idx] = Plane::new(view, exported_stride);
275    plane_count = (plane_idx + 1) as u8;
276  }
277
278  // pts / duration / time_base
279  let pts = if pts_raw != AV_NOPTS_VALUE {
280    Some(Timestamp::new(pts_raw, time_base))
281  } else {
282    None
283  };
284  let duration = if duration_raw > 0 {
285    Some(Timestamp::new(duration_raw, time_base))
286  } else {
287    None
288  };
289
290  // Visible rect (FFmpeg crop).
291  let visible_rect = unsafe { build_visible_rect(av_frame, width, height) };
292
293  // Color metadata (the universal cross-backend bits). We read each
294  // bindgen enum-typed field through a raw `i32` window — even
295  // referencing an out-of-range enum value is UB before any cast can
296  // run, so we never let Rust assume the field actually inhabits the
297  // enum's discriminant set. FFmpeg version skew or a buggy decoder
298  // can put unknown values into these fields.
299
300  // SAFETY: `av_frame` points at a live AVFrame; `addr_of!` computes
301  // the address without forming a reference, and `read_unaligned::<i32>`
302  // is sound because each of these enum types has the layout of
303  // `c_int` (i32) per FFmpeg's bindgen output.
304  let color_primaries_raw =
305    unsafe { read_unaligned(addr_of!((*av_frame).color_primaries) as *const i32) };
306  let color_trc_raw = unsafe { read_unaligned(addr_of!((*av_frame).color_trc) as *const i32) };
307  let colorspace_raw = unsafe { read_unaligned(addr_of!((*av_frame).colorspace) as *const i32) };
308  let color_range_raw = unsafe { read_unaligned(addr_of!((*av_frame).color_range) as *const i32) };
309  let chroma_location_raw =
310    unsafe { read_unaligned(addr_of!((*av_frame).chroma_location) as *const i32) };
311  let color = ColorInfo::UNSPECIFIED
312    .with_primaries(map_primaries(color_primaries_raw))
313    .with_transfer(map_transfer(color_trc_raw))
314    .with_matrix(map_matrix(colorspace_raw))
315    .with_range(map_range(color_range_raw))
316    .with_chroma_location(map_chroma_loc(chroma_location_raw));
317
318  // Backend-specific extras.
319  let extra = unsafe { build_video_frame_extra(av_frame) };
320
321  // pix_fmt is already mediadecode::PixelFormat thanks to the boundary
322  // function above, so we just pass it through.
323  let mut out = VideoFrame::new(
324    Dimensions::new(width, height),
325    pix_fmt,
326    planes_out,
327    plane_count,
328    extra,
329  )
330  .with_pts(pts)
331  .with_duration(duration)
332  .with_color(color);
333  if let Some(r) = visible_rect {
334    out = out.with_visible_rect(Some(r));
335  }
336  Ok(out)
337}
338
339fn plane_placeholder() -> Result<Plane<FfmpegBuffer>, ConvertError> {
340  // Allocate a zero-byte AVBufferRef as a placeholder for unused plane
341  // slots. `[Plane<B>; 4]` requires four populated entries; we only
342  // expose `plane_count` of them through `VideoFrame::planes()`.
343  let raw = unsafe { av_buffer_alloc(0) };
344  // `av_buffer_alloc(0)` is allowed to return null on some platforms;
345  // fall back to allocating 1 byte if so.
346  let raw = if raw.is_null() {
347    unsafe { av_buffer_alloc(1) }
348  } else {
349    raw
350  };
351  if raw.is_null() {
352    // Truly OOM. Return an error by way of a poisoned plane.
353    return Err(ConvertError::BufferAcquireFailed { plane: 4 });
354  }
355  let buf =
356    unsafe { FfmpegBuffer::take(raw) }.ok_or(ConvertError::BufferAcquireFailed { plane: 4 })?;
357  Ok(Plane::new(buf, 0))
358}
359
360/// # Safety
361/// `av_frame` must be a live `*const AVFrame` for the duration of this
362/// call. The function reads only `crop_*` fields through the raw
363/// pointer — it never forms `&AVFrame`, so unrelated invalid enum
364/// fields elsewhere in the struct don't matter.
365unsafe fn build_visible_rect(av_frame: *const AVFrame, width: u32, height: u32) -> Option<Rect> {
366  let crop_left = unsafe { (*av_frame).crop_left } as u32;
367  let crop_top = unsafe { (*av_frame).crop_top } as u32;
368  let crop_right = unsafe { (*av_frame).crop_right } as u32;
369  let crop_bottom = unsafe { (*av_frame).crop_bottom } as u32;
370  if crop_left == 0 && crop_top == 0 && crop_right == 0 && crop_bottom == 0 {
371    return None;
372  }
373  let x = crop_left;
374  let y = crop_top;
375  let w = width.saturating_sub(crop_left).saturating_sub(crop_right);
376  let h = height.saturating_sub(crop_top).saturating_sub(crop_bottom);
377  Some(Rect::new(x, y, w, h))
378}
379
380/// # Safety
381/// `av_frame` must be a live `*const AVFrame` for the duration of this
382/// call. Reads each individual field through the raw pointer; never
383/// forms a `&AVFrame` reference.
384unsafe fn build_video_frame_extra(av_frame: *const AVFrame) -> VideoFrameExtra {
385  let mut out = VideoFrameExtra::default();
386  // SAR.
387  let sar_num = unsafe { (*av_frame).sample_aspect_ratio.num };
388  let sar_den = unsafe { (*av_frame).sample_aspect_ratio.den };
389  if sar_num > 0 && sar_den > 0 && (sar_num != 1 || sar_den != 1) {
390    out.set_sample_aspect_ratio(Some((sar_num as u32, sar_den as u32)));
391  }
392  // Picture type — read raw to avoid bindgen-enum UB if FFmpeg writes
393  // an out-of-range value (version skew / hostile decoder).
394
395  // SAFETY: `av_frame` is live; reading `pict_type` as `i32` matches
396  // the bindgen enum's underlying `c_int` storage.
397  let pict_type_raw = unsafe { read_unaligned(addr_of!((*av_frame).pict_type) as *const i32) };
398  out.set_picture_type(map_picture_type_raw(pict_type_raw));
399  // Key frame and interlace flags. AVFrame.flags has dedicated bits
400  // for these in recent FFmpeg; the deprecated fields (key_frame,
401  // interlaced_frame, top_field_first) still mirror them.
402  let flags = unsafe { (*av_frame).flags };
403  out.set_key_frame(flags & ffmpeg_next::ffi::AV_FRAME_FLAG_KEY != 0);
404  out.set_interlaced(flags & ffmpeg_next::ffi::AV_FRAME_FLAG_INTERLACED != 0);
405  out.set_top_field_first(flags & ffmpeg_next::ffi::AV_FRAME_FLAG_TOP_FIELD_FIRST != 0);
406  // Best-effort timestamp.
407  let bet = unsafe { (*av_frame).best_effort_timestamp };
408  if bet != AV_NOPTS_VALUE {
409    out.set_best_effort_timestamp(Some(bet));
410  }
411  // Side data — passthrough as raw bytes.
412  out.set_side_data(unsafe { collect_side_data(av_frame) });
413  out
414}
415
416/// Maximum number of `AVFrameSideData` entries we will copy out of
417/// a single AVFrame. Realistic streams attach a handful (mastering
418/// display, content light level, dynamic HDR metadata, S12M
419/// timecodes, A53 captions, …) — usually < 8. The cap exists so a
420/// crafted stream can't drive the safe converter into a long
421/// per-frame entry-allocation loop.
422const SIDE_DATA_MAX_ENTRIES: usize = 64;
423/// Per-AVFrame total side-data byte cap. HDR / dynamic-metadata
424/// payloads are typically a few hundred bytes; A53 captions can run
425/// to a few kilobytes; SEI dumps in pathological streams have been
426/// observed in the tens of kilobytes. 256 KiB is two orders of
427/// magnitude over the realistic upper bound while still bounded
428/// enough that an attacker-driven OOM via metadata is impossible.
429const SIDE_DATA_MAX_TOTAL_BYTES: usize = 256 * 1024;
430
431/// Maximum number of `AVSubtitleRect` entries we copy from a single
432/// AVSubtitle. Realistic subtitles attach 1–4 rects per cue; 64
433/// gives two orders of magnitude of headroom.
434const SUBTITLE_MAX_RECTS: usize = 64;
435/// Per-rect text/ASS payload byte cap. ASS lines exceeding this
436/// are unrealistic; the cap exists to defeat a malicious decoder
437/// attaching a multi-megabyte "subtitle" string.
438const SUBTITLE_MAX_TEXT_BYTES_PER_RECT: usize = 64 * 1024;
439/// Total text/ASS payload byte cap across all rects of a single
440/// AVSubtitle, including newline separators.
441const SUBTITLE_MAX_TEXT_TOTAL_BYTES: usize = 256 * 1024;
442/// Per-rect bitmap (`linesize * height`) byte cap. DVB / PGS
443/// subtitles realistically run to ~256 KiB on full-HD overlays;
444/// 16 MiB is two orders of magnitude over.
445const SUBTITLE_MAX_BITMAP_BYTES_PER_RECT: usize = 16 * 1024 * 1024;
446/// Total bitmap byte cap across all rects of a single AVSubtitle.
447const SUBTITLE_MAX_BITMAP_TOTAL_BYTES: usize = 32 * 1024 * 1024;
448
449/// Bounded counterpart to `CStr::from_ptr(p).to_bytes()`. Reads at
450/// most `cap + 1` bytes from `ptr` looking for a NUL terminator;
451/// returns `Some(slice)` of the bytes preceding the NUL on success,
452/// or `None` if no NUL was found within the window (the input was
453/// either too long or missing its required terminator entirely).
454///
455/// `CStr::from_ptr` walks until it hits a NUL — a valid-but-
456/// pathological string makes that scan unbounded, and a missing
457/// NUL is an outright UB precondition violation. This helper bounds
458/// both at `cap + 1` bytes.
459///
460/// # Safety
461/// `ptr` must be non-null and valid for reads of at least
462/// `min(cap + 1, length-until-NUL)` bytes. FFmpeg subtitle/text
463/// pointers satisfy this when `(*rect).text` / `.ass` is non-null
464/// (per FFmpeg's contract — though the contract itself doesn't
465/// bound the length).
466unsafe fn bounded_cstr_bytes<'a>(ptr: *const core::ffi::c_char, cap: usize) -> Option<&'a [u8]> {
467  // Read up to `cap + 1` bytes; the +1 lets a string exactly `cap`
468  // bytes long (with a NUL at index `cap`) succeed.
469  let max = cap.saturating_add(1);
470  for i in 0..max {
471    // SAFETY: Caller guarantees `ptr` is valid for reads of bytes
472    // until the NUL or `max`. We stop at the first NUL within the
473    // window.
474    let byte = unsafe { *(ptr.add(i) as *const u8) };
475    if byte == 0 {
476      // SAFETY: `ptr` is valid for `i` byte reads (we just walked
477      // them above). The slice doesn't include the NUL.
478      return Some(unsafe { core::slice::from_raw_parts(ptr as *const u8, i) });
479    }
480  }
481  // No NUL found within `cap + 1` bytes — input is too long or
482  // missing its terminator. Reject.
483  None
484}
485
486/// # Safety
487/// `av_frame` must be a live `*const AVFrame`. The function reads
488/// `nb_side_data` and `side_data[]` through the raw pointer; each
489/// `AVFrameSideData.type_` is read raw (it's a bindgen enum), and
490/// each `data` payload is bounds-checked before slicing.
491///
492/// Memory-safety stance: this function is called on every decoded
493/// frame, on data the decoder controls. Side-data is bounded by
494/// [`SIDE_DATA_MAX_ENTRIES`] entries and [`SIDE_DATA_MAX_TOTAL_BYTES`]
495/// total bytes; once either cap is reached we stop copying further
496/// entries and a `tracing::warn!` is emitted at most once per call.
497/// Allocations use `try_reserve_exact` so OOM surfaces as a dropped
498/// entry rather than a process abort.
499unsafe fn collect_side_data(av_frame: *const AVFrame) -> std::vec::Vec<SideDataEntry> {
500  // Read `nb_side_data` as the bindgen `c_int` and clamp non-
501  // positive values BEFORE casting to `usize`. A negative value
502  // (corrupt / version-skew decoder output) cast directly to
503  // `usize` becomes a huge positive count and would walk OOB
504  // memory below; treat it as "no side data".
505  let nb_side_data_raw = unsafe { (*av_frame).nb_side_data };
506  let side_data = unsafe { (*av_frame).side_data };
507  if nb_side_data_raw <= 0 || side_data.is_null() {
508    return Vec::new();
509  }
510  let count_raw = nb_side_data_raw as usize;
511  let count = count_raw.min(SIDE_DATA_MAX_ENTRIES);
512  if count_raw > SIDE_DATA_MAX_ENTRIES {
513    tracing::warn!(
514      cap = SIDE_DATA_MAX_ENTRIES,
515      requested = count_raw,
516      "mediadecode-ffmpeg: AVFrame.nb_side_data exceeds entry cap; truncating",
517    );
518  }
519  let mut out: Vec<SideDataEntry> = Vec::new();
520  if out.try_reserve_exact(count).is_err() {
521    return Vec::new();
522  }
523  let mut total_bytes: usize = 0;
524  for i in 0..count {
525    let sd = unsafe { *side_data.add(i) };
526    if sd.is_null() {
527      continue;
528    }
529    // `AVFrameSideData.type_` is `AVFrameSideDataType` — bindgen
530    // enum. Read raw to avoid forming an invalid value if FFmpeg
531    // writes an unknown discriminant (version skew).
532    let kind = unsafe { read_unaligned(addr_of!((*sd).type_) as *const i32) };
533    let size = unsafe { (*sd).size };
534    let data_ptr = unsafe { (*sd).data };
535    let data_slice = if size == 0 || data_ptr.is_null() {
536      Vec::new()
537    } else {
538      // Byte-budget check: stop copying further side-data entries
539      // once we've reached the per-frame cap. Earlier entries
540      // already in `out` stay; later entries are dropped.
541      let projected = total_bytes.saturating_add(size);
542      if projected > SIDE_DATA_MAX_TOTAL_BYTES {
543        tracing::warn!(
544          cap = SIDE_DATA_MAX_TOTAL_BYTES,
545          projected,
546          "mediadecode-ffmpeg: AVFrame side-data byte cap reached; dropping remaining entries",
547        );
548        break;
549      }
550      total_bytes = projected;
551      // Fallible copy. `try_reserve_exact` lets OOM surface as a
552      // dropped entry rather than a process abort.
553      let mut buf: Vec<u8> = Vec::new();
554      if buf.try_reserve_exact(size).is_err() {
555        continue;
556      }
557      // SAFETY: `data_ptr` is documented as valid for `size` bytes
558      // per FFmpeg's AVFrameSideData contract.
559      let src = unsafe { core::slice::from_raw_parts(data_ptr, size) };
560      buf.extend_from_slice(src);
561      buf
562    };
563    out.push(SideDataEntry::new(kind, data_slice));
564  }
565  out
566}
567
568/// Locate the `AVBufferRef` in `(*av_frame).buf[]` that backs
569/// `data_ptr`, confirming the requested `bytes` fit inside the buffer.
570/// Returns `None` on no match, null/empty `buf` entries, or any
571/// arithmetic that would overflow `usize`.
572///
573/// # Safety
574/// `av_frame` must be a live `*const AVFrame`. Reads `buf[]` (an
575/// array of pointers — no bindgen-enum validity hazards).
576unsafe fn find_backing_buffer(
577  av_frame: *const AVFrame,
578  data_ptr: *const u8,
579  bytes: usize,
580) -> Option<*mut ffmpeg_next::ffi::AVBufferRef> {
581  let buf_array_len = unsafe { (*av_frame).buf.len() };
582  for i in 0..buf_array_len {
583    let buf = unsafe { (*av_frame).buf[i] };
584    if buf.is_null() {
585      continue;
586    }
587    let buf_data = unsafe { (*buf).data as *const u8 };
588    let buf_size = unsafe { (*buf).size };
589    if buf_data.is_null() {
590      continue;
591    }
592    let start = buf_data as usize;
593    let Some(end) = start.checked_add(buf_size) else {
594      continue;
595    };
596    let dp = data_ptr as usize;
597    let Some(dp_end) = dp.checked_add(bytes) else {
598      continue;
599    };
600    if dp >= start && dp_end <= end {
601      return Some(buf);
602    }
603  }
604  None
605}
606
607fn map_primaries(raw: i32) -> ColorPrimaries {
608  match raw {
609    x if x == AVColorPrimaries::AVCOL_PRI_BT709 as i32 => ColorPrimaries::Bt709,
610    x if x == AVColorPrimaries::AVCOL_PRI_UNSPECIFIED as i32 => ColorPrimaries::Unspecified,
611    x if x == AVColorPrimaries::AVCOL_PRI_BT470M as i32 => ColorPrimaries::Bt470M,
612    x if x == AVColorPrimaries::AVCOL_PRI_BT470BG as i32 => ColorPrimaries::Bt470Bg,
613    x if x == AVColorPrimaries::AVCOL_PRI_SMPTE170M as i32 => ColorPrimaries::Smpte170M,
614    x if x == AVColorPrimaries::AVCOL_PRI_SMPTE240M as i32 => ColorPrimaries::Smpte240M,
615    x if x == AVColorPrimaries::AVCOL_PRI_FILM as i32 => ColorPrimaries::Film,
616    x if x == AVColorPrimaries::AVCOL_PRI_BT2020 as i32 => ColorPrimaries::Bt2020,
617    x if x == AVColorPrimaries::AVCOL_PRI_SMPTE428 as i32 => ColorPrimaries::SmpteSt428,
618    x if x == AVColorPrimaries::AVCOL_PRI_SMPTE431 as i32 => ColorPrimaries::SmpteRp431,
619    x if x == AVColorPrimaries::AVCOL_PRI_SMPTE432 as i32 => ColorPrimaries::SmpteEg432,
620    x if x == AVColorPrimaries::AVCOL_PRI_EBU3213 as i32 => ColorPrimaries::Ebu3213E,
621    _ => ColorPrimaries::Unspecified,
622  }
623}
624
625fn map_transfer(raw: i32) -> ColorTransfer {
626  match raw {
627    x if x == AVColorTransferCharacteristic::AVCOL_TRC_BT709 as i32 => ColorTransfer::Bt709,
628    x if x == AVColorTransferCharacteristic::AVCOL_TRC_UNSPECIFIED as i32 => {
629      ColorTransfer::Unspecified
630    }
631    x if x == AVColorTransferCharacteristic::AVCOL_TRC_GAMMA22 as i32 => ColorTransfer::Bt470M,
632    x if x == AVColorTransferCharacteristic::AVCOL_TRC_GAMMA28 as i32 => ColorTransfer::Bt470Bg,
633    x if x == AVColorTransferCharacteristic::AVCOL_TRC_SMPTE170M as i32 => ColorTransfer::Smpte170M,
634    x if x == AVColorTransferCharacteristic::AVCOL_TRC_SMPTE240M as i32 => ColorTransfer::Smpte240M,
635    x if x == AVColorTransferCharacteristic::AVCOL_TRC_LINEAR as i32 => ColorTransfer::Linear,
636    x if x == AVColorTransferCharacteristic::AVCOL_TRC_LOG as i32 => ColorTransfer::Log100,
637    x if x == AVColorTransferCharacteristic::AVCOL_TRC_LOG_SQRT as i32 => ColorTransfer::Log316,
638    x if x == AVColorTransferCharacteristic::AVCOL_TRC_IEC61966_2_4 as i32 => {
639      ColorTransfer::Iec6196624
640    }
641    x if x == AVColorTransferCharacteristic::AVCOL_TRC_BT1361_ECG as i32 => {
642      ColorTransfer::Bt1361Ecg
643    }
644    x if x == AVColorTransferCharacteristic::AVCOL_TRC_IEC61966_2_1 as i32 => {
645      ColorTransfer::Iec6196621
646    }
647    x if x == AVColorTransferCharacteristic::AVCOL_TRC_BT2020_10 as i32 => {
648      ColorTransfer::Bt2020_10Bit
649    }
650    x if x == AVColorTransferCharacteristic::AVCOL_TRC_BT2020_12 as i32 => {
651      ColorTransfer::Bt2020_12Bit
652    }
653    x if x == AVColorTransferCharacteristic::AVCOL_TRC_SMPTE2084 as i32 => {
654      ColorTransfer::SmpteSt2084Pq
655    }
656    x if x == AVColorTransferCharacteristic::AVCOL_TRC_SMPTE428 as i32 => ColorTransfer::SmpteSt428,
657    x if x == AVColorTransferCharacteristic::AVCOL_TRC_ARIB_STD_B67 as i32 => {
658      ColorTransfer::AribStdB67Hlg
659    }
660    _ => ColorTransfer::Unspecified,
661  }
662}
663
664fn map_matrix(raw: i32) -> ColorMatrix {
665  match raw {
666    x if x == AVColorSpace::AVCOL_SPC_BT709 as i32 => ColorMatrix::Bt709,
667    x if x == AVColorSpace::AVCOL_SPC_BT2020_NCL as i32 => ColorMatrix::Bt2020Ncl,
668    x if x == AVColorSpace::AVCOL_SPC_SMPTE170M as i32 => ColorMatrix::Bt601,
669    x if x == AVColorSpace::AVCOL_SPC_BT470BG as i32 => ColorMatrix::Bt601,
670    x if x == AVColorSpace::AVCOL_SPC_SMPTE240M as i32 => ColorMatrix::Smpte240m,
671    x if x == AVColorSpace::AVCOL_SPC_FCC as i32 => ColorMatrix::Fcc,
672    x if x == AVColorSpace::AVCOL_SPC_YCGCO as i32 => ColorMatrix::YCgCo,
673    _ => ColorMatrix::Bt709, // ColorMatrix has no Unspecified; Bt709 is FFmpeg's height>=720 default
674  }
675}
676
677fn map_range(raw: i32) -> ColorRange {
678  match raw {
679    x if x == AVColorRange::AVCOL_RANGE_JPEG as i32 => ColorRange::Full,
680    x if x == AVColorRange::AVCOL_RANGE_MPEG as i32 => ColorRange::Limited,
681    _ => ColorRange::Unspecified,
682  }
683}
684
685fn map_chroma_loc(raw: i32) -> ChromaLocation {
686  match raw {
687    x if x == AVChromaLocation::AVCHROMA_LOC_LEFT as i32 => ChromaLocation::Left,
688    x if x == AVChromaLocation::AVCHROMA_LOC_CENTER as i32 => ChromaLocation::Center,
689    x if x == AVChromaLocation::AVCHROMA_LOC_TOPLEFT as i32 => ChromaLocation::TopLeft,
690    x if x == AVChromaLocation::AVCHROMA_LOC_TOP as i32 => ChromaLocation::Top,
691    x if x == AVChromaLocation::AVCHROMA_LOC_BOTTOMLEFT as i32 => ChromaLocation::BottomLeft,
692    x if x == AVChromaLocation::AVCHROMA_LOC_BOTTOM as i32 => ChromaLocation::Bottom,
693    _ => ChromaLocation::Unspecified,
694  }
695}
696
697/// Converts an FFmpeg audio `AVFrame` into a `mediadecode::AudioFrame`.
698///
699/// The plane payloads are zero-copy views into the source frame's
700/// `AVBufferRef` entries (the corresponding `data[i]` is always
701/// covered by exactly one of `buf[i]` per FFmpeg's contract). Channel
702/// counts above 8 (which would spill into `extended_buf`) are clamped
703/// to 8 — the rare cases where this matters can read the source
704/// `AVFrame` directly.
705///
706/// # Safety
707///
708/// `av_frame` must be a live `*const AVFrame` for the duration of this
709/// call and must describe an audio frame (`format` is an
710/// `AVSampleFormat`, `nb_samples > 0`, and `data[]` / `buf[]` populated).
711pub unsafe fn av_frame_to_audio_frame(
712  av_frame: *const AVFrame,
713  time_base: Timebase,
714) -> Result<AudioFrame<SampleFormat, AudioChannelLayout, AudioFrameExtra, FfmpegBuffer>, ConvertError>
715{
716  if av_frame.is_null() {
717    return Err(ConvertError::NullFrame);
718  }
719  // Same stance as `av_frame_to_video_frame`: never form `&AVFrame`.
720  // Read every field through the raw pointer; for `ch_layout` (which
721  // contains an `order: AVChannelOrder` enum) we hand the raw pointer
722  // straight into `channel_layout::audio_channel_layout_from_raw_ptr`,
723  // which validates `order` as `i32` before constructing any
724  // `AVChannelOrder` value.
725  let format_raw = unsafe { (*av_frame).format };
726  let sample_rate_raw = unsafe { (*av_frame).sample_rate };
727  let nb_samples_raw = unsafe { (*av_frame).nb_samples };
728  let pts_raw = unsafe { (*av_frame).pts };
729  let duration_raw = unsafe { (*av_frame).duration };
730  let bet_raw = unsafe { (*av_frame).best_effort_timestamp };
731
732  let sample_format = SampleFormat::from_raw(format_raw);
733  let sample_rate = sample_rate_raw.max(0) as u32;
734  let nb_samples = nb_samples_raw.max(0) as u32;
735
736  // SAFETY: `av_frame` is a live `*const AVFrame`; passing the
737  // address of the embedded ch_layout as `*const AVChannelLayout`
738  // is sound because `addr_of!` doesn't form a reference.
739  let ch_layout_ptr = unsafe { addr_of!((*av_frame).ch_layout) };
740  let channel_layout =
741    unsafe { crate::channel_layout::audio_channel_layout_from_raw_ptr(ch_layout_ptr) };
742  let channel_count_full = channel_layout.channels();
743  let channel_count = channel_count_full.min(255) as u8;
744
745  // Plane count: 1 for packed, channel_count for planar.
746  let is_planar = sample_format.is_planar();
747  let plane_count_full = if is_planar { channel_count as usize } else { 1 };
748  // mediadecode's `AudioFrame` carries up to 8 plane slots
749  // (matching `AV_NUM_DATA_POINTERS`). Planar audio with more than
750  // 8 channels uses `AVFrame.extended_data[]` / `extended_buf[]`,
751  // which we don't yet plumb through. Refuse the frame rather than
752  // silently truncating to the first 8 channels and returning an
753  // `AudioFrame` whose advertised `channel_count` exceeds its
754  // populated plane count.
755  if plane_count_full > 8 {
756    return Err(ConvertError::InvalidPlaneLayout { plane: 8 });
757  }
758  let plane_count = plane_count_full as u8;
759
760  // Per-plane size in bytes. For audio, FFmpeg only sets `linesize[0]`;
761  // every planar plane has the same size, every packed buffer is the
762  // total size for all channels. Validate against the format's
763  // expected minimum so a hostile/buggy decoder can't smuggle a
764  // shrunk linesize past us (which would let consumers read past
765  // valid bytes when they trust `nb_samples`).
766  let linesize0 = unsafe { (*av_frame).linesize[0] };
767  if nb_samples > 0 && linesize0 <= 0 {
768    return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
769  }
770  let plane_bytes = linesize0.max(0) as usize;
771  if nb_samples > 0 {
772    let bytes_per_sample = sample_format
773      .bytes_per_sample()
774      .ok_or(ConvertError::InvalidPlaneLayout { plane: 0 })? as usize;
775    let expected_per_plane = if is_planar {
776      // Planar: each plane carries `nb_samples * bytes_per_sample`.
777      (nb_samples as usize)
778        .checked_mul(bytes_per_sample)
779        .ok_or(ConvertError::InvalidPlaneLayout { plane: 0 })?
780    } else {
781      // Packed: the single plane interleaves all channels.
782      (nb_samples as usize)
783        .checked_mul(bytes_per_sample)
784        .and_then(|x| x.checked_mul(channel_count.max(1) as usize))
785        .ok_or(ConvertError::InvalidPlaneLayout { plane: 0 })?
786    };
787    if plane_bytes < expected_per_plane {
788      return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
789    }
790  }
791
792  let mut planes_out: [Plane<FfmpegBuffer>; 8] = [
793    audio_plane_placeholder()?,
794    audio_plane_placeholder()?,
795    audio_plane_placeholder()?,
796    audio_plane_placeholder()?,
797    audio_plane_placeholder()?,
798    audio_plane_placeholder()?,
799    audio_plane_placeholder()?,
800    audio_plane_placeholder()?,
801  ];
802
803  // Same rationale as in the video path — index-by-key over three
804  // unrelated raw arrays (`planes_out`, `(*av_frame).data`, and the
805  // implicit per-plane bookkeeping); no slice iteration applies.
806  #[allow(clippy::needless_range_loop)]
807  for plane_idx in 0..plane_count as usize {
808    let data_ptr = unsafe { (*av_frame).data[plane_idx] };
809    if data_ptr.is_null() {
810      // A null plane in a planar layout (or the sole plane in a
811      // packed layout) means the decoder produced an incomplete
812      // frame — surface as an error rather than returning a frame
813      // whose `planes()` exposes empty placeholder channels for
814      // the missing data.
815      return Err(ConvertError::InvalidPlaneLayout { plane: plane_idx });
816    }
817    let buf = unsafe { find_audio_backing_buffer(av_frame, data_ptr, plane_bytes) }
818      .ok_or(ConvertError::BufferAcquireFailed { plane: plane_idx })?;
819    // See `av_frame_to_video_frame` for the rationale on plain
820    // address subtraction over `offset_from`.
821    let offset = unsafe { (data_ptr as usize).wrapping_sub((*buf).data as usize) };
822    // SAFETY: `buf` is non-null and live; offset + plane_bytes <= buf.size
823    // by find_audio_backing_buffer's bounds check.
824    let view = unsafe { FfmpegBuffer::from_ref_view(buf, offset, plane_bytes) }
825      .ok_or(ConvertError::BufferAcquireFailed { plane: plane_idx })?;
826    planes_out[plane_idx] = Plane::new(view, plane_bytes as u32);
827  }
828
829  let pts = if pts_raw != AV_NOPTS_VALUE {
830    Some(Timestamp::new(pts_raw, time_base))
831  } else {
832    None
833  };
834  let duration = if duration_raw > 0 {
835    Some(Timestamp::new(duration_raw, time_base))
836  } else {
837    None
838  };
839
840  let mut extra = AudioFrameExtra::default();
841  if bet_raw != AV_NOPTS_VALUE {
842    extra.set_best_effort_timestamp(Some(bet_raw));
843  }
844  // SAFETY: caller upholds liveness for the duration of the call;
845  // collect_side_data reads enum-typed `type_` raw and bounds-checks
846  // each entry's data slice.
847  extra.set_side_data(unsafe { collect_side_data(av_frame) });
848
849  Ok(
850    AudioFrame::new(
851      sample_rate,
852      nb_samples,
853      channel_count,
854      sample_format,
855      channel_layout,
856      planes_out,
857      plane_count,
858      extra,
859    )
860    .with_pts(pts)
861    .with_duration(duration),
862  )
863}
864
865fn audio_plane_placeholder() -> Result<Plane<FfmpegBuffer>, ConvertError> {
866  let raw = unsafe { av_buffer_alloc(1) };
867  if raw.is_null() {
868    return Err(ConvertError::BufferAcquireFailed { plane: 8 });
869  }
870  let buf =
871    unsafe { FfmpegBuffer::take(raw) }.ok_or(ConvertError::BufferAcquireFailed { plane: 8 })?;
872  Ok(Plane::new(buf, 0))
873}
874
875/// # Safety
876/// `av_frame` must be a live `*const AVFrame`.
877unsafe fn find_audio_backing_buffer(
878  av_frame: *const AVFrame,
879  data_ptr: *const u8,
880  bytes: usize,
881) -> Option<*mut ffmpeg_next::ffi::AVBufferRef> {
882  // Audio frames pack each plane into a separate AVBufferRef in buf[].
883  // Same scan as the video path — finds whichever buffer's data range
884  // contains data_ptr. Overflow-safe arithmetic per
885  // `find_backing_buffer`'s rationale.
886  let buf_array_len = unsafe { (*av_frame).buf.len() };
887  for i in 0..buf_array_len {
888    let buf = unsafe { (*av_frame).buf[i] };
889    if buf.is_null() {
890      continue;
891    }
892    let buf_data = unsafe { (*buf).data as *const u8 };
893    let buf_size = unsafe { (*buf).size };
894    if buf_data.is_null() {
895      continue;
896    }
897    let start = buf_data as usize;
898    let Some(end) = start.checked_add(buf_size) else {
899      continue;
900    };
901    let dp = data_ptr as usize;
902    let Some(dp_end) = dp.checked_add(bytes) else {
903      continue;
904    };
905    if dp >= start && dp_end <= end {
906      return Some(buf);
907    }
908  }
909  None
910}
911
912/// Converts an FFmpeg `AVSubtitle` into a `mediadecode::SubtitleFrame`.
913///
914/// Strategy:
915/// - If the subtitle contains any text/ASS rects, produce a
916///   [`SubtitlePayload::Text`] whose buffer is the concatenation of
917///   their UTF-8 contents (newline-separated).
918/// - Otherwise, if the subtitle contains bitmap rects, produce a
919///   [`SubtitlePayload::Bitmap`] with one [`mediadecode::subtitle::BitmapRegion`]
920///   per rect (paletted indices and RGBA palette copied into fresh
921///   refcounted FfmpegBuffers, since `AVSubtitleRect` data is not
922///   refcounted).
923/// - An empty subtitle (no rects) becomes an empty `Text` payload.
924///
925/// `time_base` is the source stream's time base, used to label
926/// `pts` / `duration`. The duration is computed as
927/// `(end_display_time - start_display_time)` in milliseconds, then
928/// rescaled into `time_base`.
929///
930/// # Safety
931///
932/// `av_subtitle` must be a live `*const AVSubtitle` for the duration
933/// of this call; the rect array (`av_subtitle.rects`) must be valid
934/// for `av_subtitle.num_rects` entries.
935pub unsafe fn av_subtitle_to_subtitle_frame(
936  av_subtitle: *const ffmpeg_next::ffi::AVSubtitle,
937  time_base: Timebase,
938) -> Result<SubtitleFrame<SubtitleFrameExtra, FfmpegBuffer>, ConvertError> {
939  if av_subtitle.is_null() {
940    return Err(ConvertError::NullFrame);
941  }
942  // Same stance as `av_frame_to_video_frame`: never form `&AVSubtitle`
943  // or `&AVSubtitleRect` (both contain `type_: AVSubtitleType` enum
944  // fields). Read every field through the raw pointer.
945
946  let mut text_chunks: std::vec::Vec<u8> = std::vec::Vec::new();
947  let mut bitmap_regions: std::vec::Vec<mediadecode::subtitle::BitmapRegion<FfmpegBuffer>> =
948    std::vec::Vec::new();
949
950  let count_raw = unsafe { (*av_subtitle).num_rects } as usize;
951  let rects_ptr = unsafe { (*av_subtitle).rects };
952  // Defensive: `num_rects > 0` with `rects == null` would be a malformed
953  // AVSubtitle, but a hostile decoder could produce one — bail rather
954  // than dereferencing.
955  if count_raw > 0 && rects_ptr.is_null() {
956    return Err(ConvertError::NullFrame);
957  }
958  // Cap rect count, total text bytes, and total bitmap bytes
959  // against decoder-controlled metadata. Realistic subtitles carry
960  // a handful of rects (typically 1–4 per displayed cue), text
961  // payloads in the low kilobytes (ASS lines), and bitmap
962  // payloads in the low hundreds of KiB (DVB / PGS). These caps
963  // are two orders of magnitude over realistic ceilings; their
964  // job is to bound a malicious / corrupt stream's allocation
965  // budget, not to limit legitimate use.
966  let count = count_raw.min(SUBTITLE_MAX_RECTS);
967  if count_raw > SUBTITLE_MAX_RECTS {
968    tracing::warn!(
969      cap = SUBTITLE_MAX_RECTS,
970      requested = count_raw,
971      "mediadecode-ffmpeg: AVSubtitle.num_rects exceeds rect cap; truncating",
972    );
973  }
974  let mut text_total_bytes: usize = 0;
975  let mut bitmap_total_bytes: usize = 0;
976
977  let text_kind = AVSubtitleType::SUBTITLE_TEXT as i32;
978  let ass_kind = AVSubtitleType::SUBTITLE_ASS as i32;
979  let bitmap_kind = AVSubtitleType::SUBTITLE_BITMAP as i32;
980  for i in 0..count {
981    // SAFETY: rects_ptr is non-null (checked above) and points to
982    // num_rects valid `*mut AVSubtitleRect` entries per FFmpeg's
983    // contract; `i < count == num_rects`, so the offset is in-bounds.
984    let rect_ptr = unsafe { *rects_ptr.add(i) };
985    if rect_ptr.is_null() {
986      continue;
987    }
988    // Read `type_` raw — avoid forming `&AVSubtitleRect` (which
989    // would require type_ to be a valid AVSubtitleType variant).
990    // SAFETY: `rect_ptr` is a live `*mut AVSubtitleRect`; `addr_of!`
991    // computes the field address without forming a reference;
992    // reading as `i32` matches the bindgen enum's `c_int` storage.
993    let rect_type_raw = unsafe { read_unaligned(addr_of!((*rect_ptr).type_) as *const i32) };
994    // Pre-read primitive fields we'll use later (no `&AVSubtitleRect`
995    // ever formed).
996    let rect_text_ptr = unsafe { (*rect_ptr).text };
997    let rect_ass_ptr = unsafe { (*rect_ptr).ass };
998    let rect_data0_ptr = unsafe { (*rect_ptr).data[0] };
999    let rect_data1_ptr = unsafe { (*rect_ptr).data[1] };
1000    let rect_linesize0 = unsafe { (*rect_ptr).linesize[0] };
1001    let rect_w = unsafe { (*rect_ptr).w };
1002    let rect_h = unsafe { (*rect_ptr).h };
1003    let rect_x = unsafe { (*rect_ptr).x };
1004    let rect_y = unsafe { (*rect_ptr).y };
1005
1006    match rect_type_raw {
1007      x if x == text_kind && !rect_text_ptr.is_null() => {
1008        // SAFETY: `text` is documented as a 0-terminated UTF-8
1009        // string, owned by FFmpeg for the lifetime of the AVSubtitle.
1010        // We use a *bounded* NUL search instead of `CStr::from_ptr`
1011        // — the latter walks until it finds a NUL, which a valid-
1012        // but-pathological string makes unbounded, and a missing
1013        // NUL violates the `CStr::from_ptr` precondition outright.
1014        // `bounded_cstr_bytes` searches at most
1015        // `SUBTITLE_MAX_TEXT_BYTES_PER_RECT + 1` bytes; if no NUL
1016        // is found inside that window the rect is rejected.
1017        let bytes = unsafe { bounded_cstr_bytes(rect_text_ptr, SUBTITLE_MAX_TEXT_BYTES_PER_RECT) }
1018          .ok_or(ConvertError::InvalidPlaneLayout { plane: 0 })?;
1019        // The cap is now enforced inside `bounded_cstr_bytes` (no
1020        // NUL within `cap + 1` ⇒ rejection); a redundant length
1021        // check is unnecessary but kept as documentation.
1022        if bytes.len() > SUBTITLE_MAX_TEXT_BYTES_PER_RECT {
1023          return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
1024        }
1025        let separator = if text_chunks.is_empty() { 0 } else { 1 };
1026        let projected = text_total_bytes
1027          .saturating_add(bytes.len())
1028          .saturating_add(separator);
1029        if projected > SUBTITLE_MAX_TEXT_TOTAL_BYTES {
1030          return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
1031        }
1032        if separator == 1 {
1033          text_chunks.push(b'\n');
1034        }
1035        text_chunks.extend_from_slice(bytes);
1036        text_total_bytes = projected;
1037      }
1038      x if x == ass_kind && !rect_ass_ptr.is_null() => {
1039        // SAFETY: `ass` is documented as 0-terminated UTF-8.
1040        // Same bounded-scan rationale as the TEXT branch above.
1041        let bytes = unsafe { bounded_cstr_bytes(rect_ass_ptr, SUBTITLE_MAX_TEXT_BYTES_PER_RECT) }
1042          .ok_or(ConvertError::InvalidPlaneLayout { plane: 0 })?;
1043        if bytes.len() > SUBTITLE_MAX_TEXT_BYTES_PER_RECT {
1044          return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
1045        }
1046        let separator = if text_chunks.is_empty() { 0 } else { 1 };
1047        let projected = text_total_bytes
1048          .saturating_add(bytes.len())
1049          .saturating_add(separator);
1050        if projected > SUBTITLE_MAX_TEXT_TOTAL_BYTES {
1051          return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
1052        }
1053        if separator == 1 {
1054          text_chunks.push(b'\n');
1055        }
1056        text_chunks.extend_from_slice(bytes);
1057        text_total_bytes = projected;
1058      }
1059      x if x == bitmap_kind => {
1060        // Bitmap region. data[0] = paletted indices, data[1] = RGBA
1061        // palette (256 entries × 4 bytes = 1024 bytes). Both are
1062        // owned by FFmpeg and not refcounted; copy into fresh buffers.
1063        let w = rect_w.max(0) as u32;
1064        let h = rect_h.max(0) as u32;
1065        let stride = rect_linesize0.max(0) as u32;
1066        if rect_data0_ptr.is_null() || stride == 0 || h == 0 {
1067          continue;
1068        }
1069        // `checked_mul` so a corrupt rect can't drive
1070        // `from_raw_parts` to an address-space-spanning length (UB
1071        // even before any deref).
1072        let data_len = (stride as usize)
1073          .checked_mul(h as usize)
1074          .ok_or(ConvertError::InvalidPlaneLayout { plane: 0 })?;
1075        // Per-rect bitmap byte cap (defends against a single
1076        // attacker rect larger than realistic DVB / PGS subtitles
1077        // by a wide margin).
1078        if data_len > SUBTITLE_MAX_BITMAP_BYTES_PER_RECT {
1079          return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
1080        }
1081        let projected_total = bitmap_total_bytes.saturating_add(data_len);
1082        if projected_total > SUBTITLE_MAX_BITMAP_TOTAL_BYTES {
1083          return Err(ConvertError::InvalidPlaneLayout { plane: 0 });
1084        }
1085        // SAFETY: data[0] is valid for `linesize[0] * h` bytes per
1086        // FFmpeg's contract; the multiplication is checked above.
1087        let data_slice = unsafe { core::slice::from_raw_parts(rect_data0_ptr, data_len) };
1088        let data_buf = FfmpegBuffer::copy_from_slice(data_slice)
1089          .ok_or(ConvertError::BufferAcquireFailed { plane: 0 })?;
1090        let palette_len = 256 * 4;
1091        let palette_buf = if rect_data1_ptr.is_null() {
1092          FfmpegBuffer::copy_from_slice(&[])
1093            .ok_or(ConvertError::BufferAcquireFailed { plane: 1 })?
1094        } else {
1095          // SAFETY: palette buffer is 256*4 bytes per FFmpeg's contract.
1096          let p = unsafe { core::slice::from_raw_parts(rect_data1_ptr, palette_len) };
1097          FfmpegBuffer::copy_from_slice(p).ok_or(ConvertError::BufferAcquireFailed { plane: 1 })?
1098        };
1099        bitmap_regions.push(mediadecode::subtitle::BitmapRegion::new(
1100          rect_x.max(0) as u32,
1101          rect_y.max(0) as u32,
1102          w,
1103          h,
1104          stride,
1105          data_buf,
1106          palette_buf,
1107        ));
1108        bitmap_total_bytes = projected_total;
1109      }
1110      _ => {}
1111    }
1112  }
1113
1114  let payload = if !text_chunks.is_empty() {
1115    let buf = FfmpegBuffer::copy_from_slice(&text_chunks)
1116      .ok_or(ConvertError::BufferAcquireFailed { plane: 0 })?;
1117    SubtitlePayload::Text {
1118      text: buf,
1119      language: None,
1120    }
1121  } else if !bitmap_regions.is_empty() {
1122    SubtitlePayload::Bitmap {
1123      regions: bitmap_regions,
1124    }
1125  } else {
1126    // No rects (or only `None`-typed) — empty text payload.
1127    let buf =
1128      FfmpegBuffer::copy_from_slice(&[]).ok_or(ConvertError::BufferAcquireFailed { plane: 0 })?;
1129    SubtitlePayload::Text {
1130      text: buf,
1131      language: None,
1132    }
1133  };
1134
1135  let sub_pts = unsafe { (*av_subtitle).pts };
1136  let pts = if sub_pts != AV_NOPTS_VALUE {
1137    Some(Timestamp::new(sub_pts, time_base))
1138  } else {
1139    None
1140  };
1141
1142  let extra = SubtitleFrameExtra::new(unsafe { (*av_subtitle).start_display_time }, unsafe {
1143    (*av_subtitle).end_display_time
1144  });
1145
1146  Ok(SubtitleFrame::new(payload, extra).with_pts(pts))
1147}
1148
1149fn map_picture_type_raw(raw: i32) -> PictureType {
1150  match raw {
1151    x if x == AVPictureType::AV_PICTURE_TYPE_I as i32 => PictureType::I,
1152    x if x == AVPictureType::AV_PICTURE_TYPE_P as i32 => PictureType::P,
1153    x if x == AVPictureType::AV_PICTURE_TYPE_B as i32 => PictureType::B,
1154    x if x == AVPictureType::AV_PICTURE_TYPE_S as i32 => PictureType::S,
1155    x if x == AVPictureType::AV_PICTURE_TYPE_SI as i32 => PictureType::Si,
1156    x if x == AVPictureType::AV_PICTURE_TYPE_SP as i32 => PictureType::Sp,
1157    x if x == AVPictureType::AV_PICTURE_TYPE_BI as i32 => PictureType::Bi,
1158    _ => PictureType::Unspecified,
1159  }
1160}