Skip to main content

codec/decode/
mod.rs

1//! GPU-only decode dispatch.
2//!
3//! Per the 2026-05-08 directive: every CPU decoder (openh264, libde265,
4//! libvpx, rav1d, libmpeg2, libxvidcore, pure-Rust ProRes) was deleted
5//! along with the legacy `FallbackDecoder` GPU→CPU fallover. The
6//! production binary supports exactly two backends:
7//!
8//!   - NVDEC (NVIDIA, via libnvcuvid)
9//!   - QSV   (Intel,  via libvpl + iHD)
10//!
11//! Hosts without one of those (no NVIDIA, no Intel Arc / Meteor Lake,
12//! or a codec the local GPU can't decode) hard-fail at
13//! [`create_decoder`]. There is no CPU decode path of any shape.
14
15#[cfg(feature = "amd")]
16pub mod amf_dec;
17#[cfg(feature = "ffmpeg")]
18pub mod ffmpeg;
19#[cfg(feature = "nvidia")]
20pub mod nvdec;
21#[cfg(feature = "qsv")]
22pub mod qsv_dec;
23
24use crate::frame::{StreamInfo, VideoFrame};
25use crate::gpu;
26
27/// Deinterleave an NV12 frame (Y plane + interleaved UV plane, each with its
28/// own row stride) into a tightly-packed `Yuv420p` buffer (Y, then U, then V).
29/// A shared NV12 deinterleave helper for the GPU decode paths.
30#[cfg(any(feature = "nvidia", feature = "amd", feature = "qsv"))]
31#[allow(dead_code)]
32pub(crate) fn nv12_planes_to_yuv420p(
33    y: &[u8],
34    y_stride: usize,
35    uv: &[u8],
36    uv_stride: usize,
37    width: usize,
38    height: usize,
39) -> Vec<u8> {
40    let cw = width / 2;
41    let ch = height / 2;
42    let mut out = Vec::with_capacity(width * height + 2 * cw * ch);
43    for row in 0..height {
44        let off = row * y_stride;
45        out.extend_from_slice(&y[off..off + width]);
46    }
47    // U then V, deinterleaved from the UV plane.
48    let mut u_plane = Vec::with_capacity(cw * ch);
49    let mut v_plane = Vec::with_capacity(cw * ch);
50    for row in 0..ch {
51        let off = row * uv_stride;
52        let r = &uv[off..off + cw * 2];
53        for c in 0..cw {
54            u_plane.push(r[2 * c]);
55            v_plane.push(r[2 * c + 1]);
56        }
57    }
58    out.extend_from_slice(&u_plane);
59    out.extend_from_slice(&v_plane);
60    out
61}
62
63/// Deinterleave host **P010** planes (Y `u16` + interleaved UV `u16`, 10-bit in
64/// the HIGH bits) into a packed `Yuv420p10le` buffer (Y, U, V planar, 10-bit in
65/// the LOW bits — `>> 6`). Shared by the AMD/Intel GPU decode paths.
66#[cfg(any(feature = "amd", feature = "qsv"))]
67#[allow(dead_code)]
68pub(crate) fn p010_planes_to_yuv420p10le(
69    y: &[u8],
70    y_stride: usize,
71    uv: &[u8],
72    uv_stride: usize,
73    width: usize,
74    height: usize,
75) -> Vec<u8> {
76    let cw = width.div_ceil(2);
77    let ch = height.div_ceil(2);
78    let mut out = Vec::with_capacity((width * height + 2 * cw * ch) * 2);
79    let rd = |buf: &[u8], off: usize| -> u16 {
80        if off + 1 < buf.len() {
81            u16::from_le_bytes([buf[off], buf[off + 1]]) >> 6
82        } else {
83            0
84        }
85    };
86    for row in 0..height {
87        let base = row * y_stride;
88        for col in 0..width {
89            out.extend_from_slice(&rd(y, base + col * 2).to_le_bytes());
90        }
91    }
92    for row in 0..ch {
93        let base = row * uv_stride;
94        for col in 0..cw {
95            out.extend_from_slice(&rd(uv, base + col * 4).to_le_bytes());
96        }
97    }
98    for row in 0..ch {
99        let base = row * uv_stride;
100        for col in 0..cw {
101            out.extend_from_slice(&rd(uv, base + col * 4 + 2).to_le_bytes());
102        }
103    }
104    out
105}
106use anyhow::{Result, bail};
107
108pub trait Decoder: Send {
109    fn stream_info(&self) -> &StreamInfo;
110
111    /// Feed one Annex-B (or codec-native — AV1 OBU, VP9 superframe) sample
112    /// into the decoder. Implementations may buffer internally until
113    /// `finish` is called or may decode eagerly and buffer produced
114    /// frames. Pull frames via `decode_next` at any point.
115    fn push_sample(&mut self, data: &[u8]) -> Result<()>;
116
117    /// Signal end-of-stream. After this, no more `push_sample` calls;
118    /// `decode_next` drains remaining frames.
119    fn finish(&mut self) -> Result<()>;
120
121    fn decode_next(&mut self) -> Result<Option<VideoFrame>>;
122}
123
124/// Truthy-string parse for env-var opt-outs. `1` / `true` / `yes` / `on`
125/// / `y` / `t` (case-insensitive) all resolve true; anything else is
126/// false. Mirrors the encode-side helper for symmetry.
127#[cfg(feature = "nvidia")]
128fn env_flag_truthy(name: &str) -> bool {
129    match std::env::var(name) {
130        Ok(v) => {
131            let v = v.to_ascii_lowercase();
132            matches!(v.as_str(), "1" | "true" | "yes" | "on" | "y" | "t")
133        }
134        Err(_) => false,
135    }
136}
137
138/// Per-codec NVDEC opt-out check. Mirrors the previous-stack
139/// `DISABLE_NVDEC_<CODEC>` granular knob: `DISABLE_NVDEC=1` blocks every
140/// codec, `DISABLE_NVDEC_H264=1` blocks just one. Used as a debugging
141/// escape hatch when a specific codec/driver combo is misbehaving on
142/// the active host (e.g. Blackwell + 4K H.264 silent-stall).
143#[cfg(feature = "nvidia")]
144fn nvdec_disabled_for(codec_lower: &str) -> bool {
145    if env_flag_truthy("DISABLE_NVDEC") {
146        return true;
147    }
148    let codec_canonical = match codec_lower {
149        "h264" | "avc1" | "avc" => "H264",
150        "h265" | "hevc" | "hvc1" | "hev1" | "hvc2" | "hev2" => "HEVC",
151        "vp8" => "VP8",
152        "vp9" | "vp09" => "VP9",
153        "av1" | "av01" => "AV1",
154        "mpeg2" | "mpeg2video" => "MPEG2",
155        "mpeg4" | "mp4v" => "MPEG4",
156        _ => return false,
157    };
158    env_flag_truthy(&format!("DISABLE_NVDEC_{codec_canonical}"))
159}
160
161/// Codecs the NVDEC streaming dispatch supports.
162#[cfg(feature = "nvidia")]
163fn nvdec_supports(codec_lower: &str) -> bool {
164    matches!(
165        codec_lower,
166        "h264"
167            | "avc1"
168            | "avc"
169            | "h265"
170            | "hevc"
171            | "hvc1"
172            | "hev1"
173            | "hvc2"
174            | "hev2"
175            | "vp8"
176            | "vp9"
177            | "vp09"
178            | "av1"
179            | "av01"
180            | "mpeg2"
181            | "mpeg2video"
182            | "mpeg4"
183            | "mp4v"
184    )
185}
186
187/// Decode backends compiled into this build, in dispatch-preference order.
188pub fn decode_backends() -> Vec<&'static str> {
189    let mut v = Vec::new();
190    if cfg!(feature = "ffmpeg") {
191        v.push("ffmpeg");
192    }
193    if cfg!(feature = "nvidia") {
194        v.push("nvdec");
195    }
196    if cfg!(feature = "amd") {
197        v.push("amf");
198    }
199    if cfg!(feature = "qsv") {
200        v.push("qsv");
201    }
202    v
203}
204
205/// One codec's decode support across the compiled backends.
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct DecodeSupport {
208    /// Canonical codec label, e.g. `"h264"`.
209    pub codec: &'static str,
210    /// Backend names that can decode it in this build (`"nvdec"`, `"amf"`,
211    /// `"qsv"`, `"ffmpeg"`). Empty = this build can't decode it.
212    pub backends: Vec<&'static str>,
213}
214
215/// Which compiled backends decode each common codec, for `rivet capabilities`.
216pub fn decode_capabilities() -> Vec<DecodeSupport> {
217    const CODECS: &[&str] = &[
218        "h264", "hevc", "vp8", "vp9", "av1", "mpeg2", "mpeg4", "prores",
219    ];
220    // ffmpeg's software/hwaccel catalogue covers all of these.
221    const FFMPEG: &[&str] = CODECS;
222    CODECS
223        .iter()
224        .map(|&codec| {
225            let mut backends: Vec<&'static str> = Vec::new();
226            #[cfg(feature = "ffmpeg")]
227            if FFMPEG.contains(&codec) {
228                backends.push("ffmpeg");
229            }
230            #[cfg(feature = "nvidia")]
231            if nvdec_supports(codec) {
232                backends.push("nvdec");
233            }
234            #[cfg(feature = "amd")]
235            if amf_dec::supports(codec) {
236                backends.push("amf");
237            }
238            // QSV: ask the driver what this host's silicon can actually decode
239            // (MFXVideoDECODE_Query), not just what the build handles — so the
240            // report reflects the real adapter (e.g. an older iGPU without AV1
241            // decode). Probed once + cached; empty on a non-Intel host.
242            #[cfg(feature = "qsv")]
243            if qsv_dec::probe_decode_caps().contains(&codec) {
244                backends.push("qsv");
245            }
246            let _ = FFMPEG;
247            DecodeSupport { codec, backends }
248        })
249        .collect()
250}
251
252/// Construct a hardware decoder for `codec`. NVIDIA GPUs win on tie
253/// when both vendors are present (NVDEC is generally lower-latency on
254/// the standard codec set + is what the production fleet has been
255/// tuned against). When NVDEC is disabled per env-var or doesn't
256/// support the codec, fall through to QSV. If neither fits, hard-fail
257/// — there is no CPU fallback.
258pub fn create_decoder(codec: &str, info: StreamInfo) -> Result<Box<dyn Decoder>> {
259    create_decoder_on(codec, info, None)
260}
261
262/// Construct a decoder pinned to a specific `gpu_index` when one is
263/// supplied. `None` preserves the legacy "pick the first matching
264/// adapter" behaviour for one-shot callers (thumbnails, tests, benches)
265/// that don't care about distributing work across physical GPUs.
266///
267/// The pipeline's per-rung decode pumps should ALWAYS pass `Some(idx)`
268/// so each rung's decode session lands on a distinct adapter — without
269/// this, every QSV session piles onto the first physical Intel card
270/// regardless of what the GPU pool's lease said. See the project memo
271/// on QSV multi-adapter session pinning.
272pub fn create_decoder_on(
273    codec: &str,
274    info: StreamInfo,
275    gpu_index: Option<u32>,
276) -> Result<Box<dyn Decoder>> {
277    let codec_lower = codec.to_ascii_lowercase();
278    let gpus = gpu::detect_gpus();
279
280    // Pick the device. If the caller specified gpu_index, honour it
281    // (matching against `g.index`). Otherwise fall back to the first
282    // of each vendor — the legacy behaviour for callers that don't
283    // care about pinning.
284    #[cfg(feature = "nvidia")]
285    let nvidia = match gpu_index {
286        Some(idx) => gpus
287            .iter()
288            .find(|g| matches!(g.vendor, gpu::GpuVendor::Nvidia) && g.index == idx),
289        None => gpus
290            .iter()
291            .find(|g| matches!(g.vendor, gpu::GpuVendor::Nvidia)),
292    };
293
294    // NVIDIA / NVDEC first — our hand-rolled CUVID FFI (`nvidia` feature). One
295    // portable decoder for everything NVDEC handles: H.264/HEVC/AV1/VP8/VP9,
296    // MPEG-2/MPEG-4 Part 2, and 10-bit P016.
297    #[cfg(feature = "nvidia")]
298    if let Some(dev) = nvidia
299        && nvdec_supports(&codec_lower)
300        && !nvdec_disabled_for(&codec_lower)
301    {
302        tracing::info!(
303            backend = "nvdec",
304            codec = %codec_lower,
305            gpu_index = dev.index,
306            gpu_name = %dev.name,
307            "NVDEC decoder engaged (hand-rolled CUVID FFI)"
308        );
309        return Ok(nvdec::NvdecDecoder::new(info, dev.index));
310    }
311
312    // AMD / AMF hardware decode — hand-rolled AMF FFI (`amd` feature).
313    #[cfg(feature = "amd")]
314    {
315        let amd = match gpu_index {
316            Some(idx) => gpus
317                .iter()
318                .find(|g| matches!(g.vendor, gpu::GpuVendor::Amd) && g.index == idx),
319            None => gpus.iter().find(|g| matches!(g.vendor, gpu::GpuVendor::Amd)),
320        };
321        if let Some(dev) = amd
322            && amf_dec::supports(&codec_lower)
323        {
324            tracing::info!(
325                backend = "amf",
326                codec = %codec_lower,
327                gpu_index = dev.index,
328                gpu_name = %dev.name,
329                "AMF decoder engaged (hand-rolled AMF FFI)"
330            );
331            return Ok(Box::new(amf_dec::AmfDecoder::new(info, dev.index)?));
332        }
333    }
334
335    // Intel / QSV hardware decode — hand-rolled oneVPL FFI (`qsv` feature).
336    #[cfg(feature = "qsv")]
337    {
338        let intel = match gpu_index {
339            Some(idx) => gpus
340                .iter()
341                .find(|g| matches!(g.vendor, gpu::GpuVendor::Intel) && g.index == idx),
342            None => gpus.iter().find(|g| matches!(g.vendor, gpu::GpuVendor::Intel)),
343        };
344        if let Some(dev) = intel
345            && qsv_dec::supports(&codec_lower)
346        {
347            tracing::info!(
348                backend = "qsv",
349                codec = %codec_lower,
350                gpu_index = dev.index,
351                gpu_name = %dev.name,
352                "QSV decoder engaged (hand-rolled oneVPL FFI)"
353            );
354            return Ok(Box::new(qsv_dec::QsvDecoder::new(info, dev.index)?));
355        }
356    }
357
358    bail!(
359        "no GPU decoder available for codec '{}' on this host \
360         (NVIDIA GPUs cover h264/h265/vp8/vp9/av1/mpeg2/mpeg4; \
361          Intel Arc/Meteor Lake+ covers h264/h265/vp9/av1). \
362         CPU decoders were removed per the GPU-only directive.",
363        codec_lower
364    )
365}