rivet-codec 0.2.0

GPU video decode/encode dispatch (NVDEC/NVENC, AMF, QSV) plus colorspace, tonemap, audio, and probe for the rivet transcoder. Imported as `codec`.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
//! CUVID / CUDA FFI struct mirrors, function-pointer type aliases,
//! compile-time size assertions, and codec/surface/packet constants.
//!
//! Everything here is `pub` so sibling submodules within `nvdec/` can
//! reach items as `super::ffi::TypeName`. The `ffi` module itself is
//! private (declared without `pub` in `mod.rs`), so nothing leaks to
//! external callers.

use std::ffi::c_void;
use std::os::raw::{c_int, c_uchar, c_uint, c_ulong, c_ulonglong};

// ─── CUDA Driver API FFI ───────────────────────────────────────────
pub type CUresult = c_int;
pub type CUdevice = c_int;
pub type CUcontext = *mut c_void;
pub type CUdeviceptr = c_ulonglong;

pub type FnCuInit = unsafe extern "C" fn(c_uint) -> CUresult;
pub type FnCuDeviceGet = unsafe extern "C" fn(*mut CUdevice, c_int) -> CUresult;
pub type FnCuCtxCreate = unsafe extern "C" fn(*mut CUcontext, c_uint, CUdevice) -> CUresult;
pub type FnCuCtxDestroy = unsafe extern "C" fn(CUcontext) -> CUresult;
pub type FnCuCtxPushCurrent = unsafe extern "C" fn(CUcontext) -> CUresult;
pub type FnCuCtxPopCurrent = unsafe extern "C" fn(*mut CUcontext) -> CUresult;
pub type FnCuMemcpy2D = unsafe extern "C" fn(*const CudaMemcpy2D) -> CUresult;

pub const CU_MEMORYTYPE_HOST: c_uint = 1;
pub const CU_MEMORYTYPE_DEVICE: c_uint = 2;

#[repr(C)]
pub struct CudaMemcpy2D {
    pub src_x_in_bytes: usize,
    pub src_y: usize,
    pub src_memory_type: c_uint,
    pub src_host: *const c_void,
    pub src_device: CUdeviceptr,
    pub src_array: *const c_void,
    pub src_pitch: usize,
    pub dst_x_in_bytes: usize,
    pub dst_y: usize,
    pub dst_memory_type: c_uint,
    pub dst_host: *mut c_void,
    pub dst_device: CUdeviceptr,
    pub dst_array: *const c_void,
    pub dst_pitch: usize,
    pub width_in_bytes: usize,
    pub height: usize,
}

// ─── CUVID (Video Decoder) FFI ─────────────────────────────────────
pub type CUvideoparser = *mut c_void;
pub type CUvideodecoder = *mut c_void;

/// Mirrors CUVIDEOFORMAT from SDK 12.2. Layout padded out with an
/// explicit reserved tail so the driver can write trailing fields
/// we don't read without corrupting adjacent memory. Only the fields
/// we actually consume in sequence_callback are named.
#[repr(C)]
#[derive(Clone, Copy)]
pub struct CuVideoFormat {
    pub codec: c_int,
    pub frame_rate_num: c_uint,
    pub frame_rate_den: c_uint,
    pub progressive_sequence: u8,
    pub bit_depth_luma_minus8: u8,
    pub bit_depth_chroma_minus8: u8,
    pub min_num_decode_surfaces: u8,
    pub coded_width: c_uint,
    pub coded_height: c_uint,
    pub display_area_left: c_int,
    pub display_area_top: c_int,
    pub display_area_right: c_int,
    pub display_area_bottom: c_int,
    pub chroma_format: c_int,
    pub bitrate: c_uint,
    pub display_aspect_num: c_int,
    pub display_aspect_den: c_int,
    pub video_signal_description: [u8; 8],
    pub seqhdr_data_length: c_uint,
    // Reserved tail for HDR metadata + codec-specific format info the
    // driver writes in SDK 12.x. Size chosen to comfortably exceed the
    // real struct size (reported ~1 KB for AV1 sequence headers).
    pub _reserved_tail: [u8; 1024],
}

/// Layout matches CUVIDPARSERPARAMS from nv-codec-headers
/// (FFmpeg/nv-codec-headers/include/ffnvcodec/dynlink_nvcuvid.h).
///
/// Authoritative field breakdown after max_display_delay:
///   - `bAnnexb:1 | bMemoryOptimize:1 | uReserved:30` — 1 u32 bitfield
///   - `uReserved1[4]` — 4 more u32
///   - pUserData + 5 callback fn pointers
///   - `pvReserved2[5]` — 5 void pointers
///   - pExtVideoInfo
///
/// The earlier 80-byte stub (single `reserved: c_uint`) placed callbacks
/// where the driver expected reserved zero bytes — segfault on long
/// streams, zero frames on short ones. Current size matches the SDK
/// within Rust's layout rules (152 bytes on Windows x64).
#[repr(C)]
pub struct CuVideoParserParams {
    pub codec_type: c_int,
    pub max_num_decode_surfaces: c_uint,
    pub clock_rate: c_uint,
    pub error_threshold: c_uint,
    pub max_display_delay: c_uint,
    /// Bitfield word (bAnnexb | bMemoryOptimize | uReserved:30) + 4 reserved u32.
    /// We zero-init and never set any bits; SDK layout compatible.
    pub reserved1: [c_uint; 5],
    pub user_data: *mut c_void,
    pub pfn_sequence_callback: Option<unsafe extern "C" fn(*mut c_void, *mut CuVideoFormat) -> c_int>,
    pub pfn_decode_picture: Option<unsafe extern "C" fn(*mut c_void, *mut CuVideoPicParams) -> c_int>,
    pub pfn_display_picture: Option<unsafe extern "C" fn(*mut c_void, *mut CuVideoDispInfo) -> c_int>,
    pub pfn_get_operating_point: Option<unsafe extern "C" fn(*mut c_void, *mut c_void) -> c_int>,
    pub pfn_get_sei_msg: Option<unsafe extern "C" fn(*mut c_void, *mut c_void) -> c_int>,
    /// SDK: `void *pvReserved2[5]`.
    pub reserved2: [*mut c_void; 5],
    pub ext_video_info: *mut c_void,
}

#[repr(C)]
pub struct CuVideoSourceDataPacket {
    pub flags: c_ulong,
    pub payload_size: c_ulong,
    pub payload: *const u8,
    pub timestamp: c_ulonglong,
}

#[repr(C)]
pub struct CuVideoDecodeCreateInfo {
    pub code_width: c_ulong,
    pub coded_height: c_ulong,
    pub num_decode_surfaces: c_ulong,
    pub codec_type: c_int,
    pub chroma_format: c_int,
    pub creation_flags: c_ulong,
    pub bit_depth_minus8: c_ulong,
    pub intra_decode_only: c_ulong,
    pub max_width: c_ulong,
    pub max_height: c_ulong,
    pub reserved1: c_ulong,
    pub display_area_left: i16,
    pub display_area_top: i16,
    pub display_area_right: i16,
    pub display_area_bottom: i16,
    pub output_format: c_int,
    pub deinterlace_mode: c_int,
    pub target_width: c_ulong,
    pub target_height: c_ulong,
    pub num_output_surfaces: c_ulong,
    pub vid_lock: *mut c_void,
    pub target_rect_left: i16,
    pub target_rect_top: i16,
    pub target_rect_right: i16,
    pub target_rect_bottom: i16,
    pub enable_histogram: c_ulong,
    pub reserved2: [c_ulong; 4],
}

/// Mirrors CUVIDPICPARAMS from SDK 12.2.
///
/// Critical — task #39 audit (2026-04-17): the REAL NVIDIA Video Codec SDK
/// 12.2 defines the trailing codec-specific region as a union whose byte
/// size is fixed by its `unsigned int CodecReserved[1024]` fallback
/// variant — that's **4096 bytes** (1024 × 4). All concrete codec
/// variants (CUVIDH264PICPARAMS, CUVIDHEVCPICPARAMS, CUVIDVP9PICPARAMS,
/// CUVIDAV1PICPARAMS, CUVIDVP8PICPARAMS, CUVIDMPEG2PICPARAMS,
/// CUVIDMPEG4PICPARAMS) fit within that 4 KiB envelope.
///
/// Note: the vendored stub at `vendor/nvidia/cuviddec.h` simplifies the
/// union to `unsigned char CodecSpecific[1024]` (1024 bytes) for
/// documentation purposes. That stub is NOT the ABI we call at runtime —
/// we dlopen the real driver binary which follows the 4096-byte layout.
/// A Rust buffer smaller than 4096 would be a driver-side write
/// overflow; larger is safe (driver writes only what it needs, we read
/// only the parsed callback-input fields before this struct, so the
/// trailing bytes are never examined).
///
/// Earlier revisions declared this as `[u8; 2048]` — half the correct
/// size. The driver overran it on H.264 Main profile (larger reference
/// lists + scaling matrices than Baseline) producing silent zero-frames
/// and the class of memory corruption that triggered task #39's
/// segfault hunt on Windows. H.264 High's different pic-params shape
/// happened to fit. Same root cause class as the CUVIDPARSERPARAMS
/// 80→152 fix (task #39/#52/#53).
#[repr(C)]
pub struct CuVideoPicParams {
    pub pic_width_in_mbs: c_int,
    pub pic_height_in_mbs: c_int,
    pub curr_pic_idx: c_int,
    pub field_pic_flag: c_int,
    pub bottom_field_flag: c_int,
    pub second_field: c_int,
    pub n_bitstream_data_len: c_uint,
    pub p_bitstream_data: *const u8,
    pub n_num_slices: c_uint,
    pub p_slice_data_offsets: *const c_uint,
    pub ref_pic_flag: c_int,
    pub intra_pic_flag: c_int,
    pub reserved: [c_uint; 30],
    // Matches the REAL SDK `union { ...; unsigned int CodecReserved[1024]; }`
    // = 4096 bytes. See struct-size assertion below.
    pub codec_specific: [c_uint; 1024],
}

#[repr(C)]
pub struct CuVideoDispInfo {
    pub picture_index: c_int,
    pub progressive_frame: c_int,
    pub top_field_first: c_int,
    pub repeat_first_field: c_int,
    pub timestamp: c_ulonglong,
}

// ─── Codec-variant pic-params shape witnesses (Squad-12, task #39) ────
//
// The driver writes a codec-specific pic-params blob into our
// `CuVideoPicParams.codec_specific` array on every `pfn_decode_picture`
// callback. We treat the contents as opaque (the parser populates them
// before we hand the struct to `cuvidDecodePicture`), but the SHAPE
// matters: the union variant the driver picks must fit within the 4096
// byte `CodecReserved[1024]` envelope or it overruns our allocation.
//
// These structs mirror the per-codec field shape closely enough to
// produce a defensible upper-bound on their packed sizeof, which we
// then assert ≤ 4096 at compile time. They are NOT used at runtime —
// declared here so a future ABI drift (e.g. an extra DPB slot in
// CUVIDH264PICPARAMS, or a new HEVC scaling list dimension) trips the
// const_assert immediately rather than silently corrupting the parser
// state and reproducing task #39 on a different code path.
//
// Reference: nv-codec-headers 12.2 (FFmpeg/nv-codec-headers
// include/ffnvcodec/cuviddec.h) and the published doxygen at
// https://ffmpeg.org/doxygen/trunk/cuviddec_8h_source.html.

/// CUVIDH264DPBENTRY — one entry of the H.264 reference picture buffer.
/// Six i32 fields (PicIdx, FrameIdx, is_long_term, not_existing,
/// used_for_reference, FieldOrderCnt[2]) → 28 bytes on every target.
/// dpb[16] in CUVIDH264PICPARAMS → 448 bytes.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoH264DpbEntry {
    pic_idx: c_int,
    frame_idx: c_int,
    is_long_term: c_int,
    not_existing: c_int,
    used_for_reference: c_int,
    field_order_cnt: [c_int; 2],
}
const _: () = assert!(std::mem::size_of::<CuVideoH264DpbEntry>() == 28);
// dpb[16] block size — the segfault hunt called this out as "16 vs 17"
// — 17 was a bogus historical theory; the SDK has always been 16.
const _: () = assert!(std::mem::size_of::<[CuVideoH264DpbEntry; 16]>() == 448);

/// Upper-bound shape of CUVIDH264PICPARAMS. Concrete fields lifted from
/// nv-codec-headers 12.2; reserved tail padded out so even if the driver
/// adds a small block in a future SDK we still fit. Real SDK reports
/// ~1.9 KiB; our witness sizes ~3.1 KiB which is conservative.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoH264PicParamsShape {
    // SPS/PPS scalars — ~30 ints worth of flags + counters in the SDK.
    sps_pps_scalars: [c_int; 32],
    // The 16-entry DPB.
    dpb: [CuVideoH264DpbEntry; 16],
    // Quant matrices: WeightScale4x4[6][16] + WeightScale8x8[2][64].
    weight_scale_4x4: [[u8; 16]; 6],
    weight_scale_8x8: [[u8; 64]; 2],
    // FMO/ASO + slice_group_map (union of u64 + ptr) + MVC/SVC ext blob.
    fmo_aso_extras: [u8; 256],
    // Reserved tail to absorb future SDK additions without re-verifying.
    reserved_tail: [u8; 1024],
}
const _: () = assert!(std::mem::size_of::<CuVideoH264PicParamsShape>() <= 4096);

/// Upper-bound shape of CUVIDHEVCPICPARAMS. SPS/PPS scalars + RPS arrays
/// (RefPicIdx[16] / PicOrderCntVal[16] / IsLongTerm[16] etc.) + scaling
/// lists. Real SDK reports ~1.2 KiB; our witness sizes ~2.5 KiB.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoHevcPicParamsShape {
    sps_pps_scalars: [c_int; 64],
    ref_pic_idx: [c_int; 16],
    pic_order_cnt_val: [c_int; 16],
    is_long_term: [c_uchar; 16],
    // RpsSetStCurrBefore/After/LtCurr — three 8-entry sets per the SDK.
    rps_sets: [[c_uchar; 8]; 3],
    // ScalingList4x4[6][16] + 8x8[6][64] + 16x16[6][64] + 32x32[2][64]
    // + ScalingListDCCoeff16x16[6] + 32x32[2].
    scaling_list_4x4: [[c_uchar; 16]; 6],
    scaling_list_8x8: [[c_uchar; 64]; 6],
    scaling_list_16x16: [[c_uchar; 64]; 6],
    scaling_list_32x32: [[c_uchar; 64]; 2],
    scaling_list_dc_16x16: [c_uchar; 6],
    scaling_list_dc_32x32: [c_uchar; 2],
    // Reserved tail.
    reserved_tail: [u8; 256],
}
const _: () = assert!(std::mem::size_of::<CuVideoHevcPicParamsShape>() <= 4096);

/// Upper-bound shape of CUVIDAV1PICPARAMS. The largest of the variants
/// per the SDK (~1.7 KiB) — film grain table + tile column/row arrays.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoAv1PicParamsShape {
    seq_header_scalars: [c_int; 32],
    // Reference frame indices (REF_FRAMES = 8 in AV1 spec).
    ref_frame_map: [c_int; 8],
    // Tile cols/rows can be up to MAX_TILE_COLS=64 / MAX_TILE_ROWS=64.
    tile_col_start_sb: [c_int; 64],
    tile_row_start_sb: [c_int; 64],
    // Loop restoration unit shifts + film grain table.
    loop_filter: [c_int; 16],
    // Film grain: scaling_points_y[14][2] + cb[10][2] + cr[10][2] + ar coeffs.
    film_grain: [u8; 512],
    reserved_tail: [u8; 256],
}
const _: () = assert!(std::mem::size_of::<CuVideoAv1PicParamsShape>() <= 4096);

/// Upper-bound shape of CUVIDVP9PICPARAMS — compact (~0.5 KiB) since VP9
/// reference handling is frame-buffer-only; no DPB entries per se.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoVp9PicParamsShape {
    profile_and_scalars: [c_int; 32],
    ref_frame_map: [c_int; 8],
    // Compressed header context probabilities — entropy coder tables.
    probs: [u8; 384],
    reserved_tail: [u8; 128],
}
const _: () = assert!(std::mem::size_of::<CuVideoVp9PicParamsShape>() <= 4096);

/// Upper-bound shape of CUVIDVP8PICPARAMS — smaller still than VP9.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoVp8PicParamsShape {
    profile_and_scalars: [c_int; 16],
    last_ref: c_int,
    golden_ref: c_int,
    alt_ref: c_int,
    // VP8 quant tables / loop filter tables.
    tables: [u8; 256],
    reserved_tail: [u8; 64],
}
const _: () = assert!(std::mem::size_of::<CuVideoVp8PicParamsShape>() <= 4096);

/// Upper-bound shape of CUVIDMPEG2PICPARAMS — tiny by modern standards.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoMpeg2PicParamsShape {
    forward_ref_pic_idx: c_int,
    backward_ref_pic_idx: c_int,
    picture_coding_type: c_int,
    full_pel_forward_vector: c_int,
    full_pel_backward_vector: c_int,
    f_code: [[c_int; 2]; 2],
    intra_dc_precision: c_int,
    frame_pred_frame_dct: c_int,
    concealment_motion_vectors: c_int,
    q_scale_type: c_int,
    intra_vlc_format: c_int,
    alternate_scan: c_int,
    top_field_first: c_int,
    quant_matrix_intra: [c_uchar; 64],
    quant_matrix_inter: [c_uchar; 64],
    reserved_tail: [u8; 32],
}
const _: () = assert!(std::mem::size_of::<CuVideoMpeg2PicParamsShape>() <= 4096);

/// Upper-bound shape of CUVIDMPEG4PICPARAMS — comparable to MPEG-2.
#[repr(C)]
#[allow(dead_code)]
struct CuVideoMpeg4PicParamsShape {
    forward_ref_pic_idx: c_int,
    backward_ref_pic_idx: c_int,
    vop_time_increment_resolution: c_int,
    vop_coding_type: c_int,
    interlaced: c_int,
    quant_type: c_int,
    quarter_sample: c_int,
    short_video_header: c_int,
    divx_flags: c_int,
    top_field_first: c_int,
    rounding_control: c_int,
    alternate_vertical_scan_flag: c_int,
    quant_matrix_intra: [c_uchar; 64],
    quant_matrix_inter: [c_uchar; 64],
    reserved_tail: [u8; 32],
}
const _: () = assert!(std::mem::size_of::<CuVideoMpeg4PicParamsShape>() <= 4096);

#[repr(C)]
pub struct CuVideoProcParams {
    pub progressive_frame: c_int,
    pub second_field: c_int,
    pub top_field_first: c_int,
    pub unpaired_field: c_int,
    pub reserved_flags: c_uint,
    pub reserved_zero: c_uint,
    pub raw_input_dptr: c_ulonglong,
    pub raw_input_pitch: c_uint,
    pub raw_input_format: c_uint,
    pub raw_output_dptr: c_ulonglong,
    pub raw_output_pitch: c_uint,
    pub reserved1: c_uint,
    pub output_stream: *mut c_void,
    pub reserved: [c_uint; 46],
    pub histogram_dptr: *mut c_void,
    pub reserved2: [*mut c_void; 1],
}

// TODO: when container compiles and tests can run, wire in
// `cuvidGetDecoderCaps` pre-flight in sequence_callback. The CUVIDDECODECAPS
// struct (SDK 12.2 cuviddec.h) reports `bIsSupported`, `nMaxWidth`,
// `nMaxHeight` for a given (codec, chroma_format, bit_depth_minus8) tuple.
// Running the query before cuvidCreateDecoder would convert "driver
// rejects silently" into an explicit "3090 NVDEC does not advertise
// HEVC 4:2:2 support" error in the WARN fallback log. Not wiring here
// yet because adding untested FFI struct layouts on top of unrunnable
// tests (container::demux currently broken by WIP task #12) would
// introduce drift I can't verify.

// ─── Compile-time struct-size assertions ──────────────────────────
//
// Task #39 NVDEC Windows segfault audit: CUVID FFI mirrors are verified
// for byte-exact layout against the REAL NVIDIA Video Codec SDK 12.2
// (dlopen'd nvcuvid.dll / libnvcuvid.so, NOT the vendored stub at
// `vendor/nvidia/*.h` which is a simplified reference). The most common
// cause of STATUS_ACCESS_VIOLATION in NVDEC pipelines is a Rust struct
// under-sized relative to the C ABI: the driver writes past our
// allocation into adjacent state, corruption surfaces later as a segfault
// or — worse — as silent wrong-frames. Compile-time asserts convert
// that class of bug from "intermittent crash on long streams" into a
// build-time error.
//
// Prior drift caught by this approach:
//   - CUVIDPARSERPARAMS 80→136 (task #39/#52/#53, fix: add reserved2 array)
//   - CUVIDPICPARAMS    2048→4280 (task #65, fix: codec_specific [u8;2048]→[c_uint;1024])
//
// Squad-12 (2026-04-17 PM) added per-codec-variant shape witnesses
// (CuVideoH264PicParamsShape, CuVideoHevcPicParamsShape, …) so a future
// SDK that grows any one variant past the 4096-byte CodecReserved[1024]
// envelope fails compilation rather than silently overflowing.
// CUVIDH264DPBENTRY size locked at 28 bytes (dpb[16] = 448 bytes).
//
// Expected sizes are computed against ffmpeg's nv-codec-headers 12.2
// (FFmpeg/nv-codec-headers/include/ffnvcodec/{dynlink_nvcuvid,
// dynlink_cuviddec}.h) on Windows MSVC x64 (c_ulong=4, pointer=8).
// Linux x86_64 differs in c_ulong=8 width; the asserts below are
// platform-conditional where that matters.
//
// If any of these assertions fire: the Rust struct no longer matches
// the driver ABI — expect silent zero-frames or STATUS_ACCESS_VIOLATION
// depending on stream length and corruption target. Fix by comparing
// field-by-field against the linked headers and updating reserved counts.

// CUVIDPARSERPARAMS: 5×u32 + 5×u32 + ptr + 5×fn_ptr + 5×ptr + ptr = 136
const _: () = assert!(std::mem::size_of::<CuVideoParserParams>() == 136);

// CUVIDEOFORMAT: 64–68 bytes of named fields (video_signal_description is
// 4 bytes in the real SDK, 7 bytes in vendored/older layouts) + our
// 1024-byte _reserved_tail. Driver only writes the front-of-struct
// fields; tail is defensive padding so any driver-version drift in the
// trailing layout cannot clobber adjacent heap state.
// We don't assert an exact size since the tail length is a Rust choice
// — just that it's comfortably above the SDK's worst-case 72 bytes.
const _: () = assert!(std::mem::size_of::<CuVideoFormat>() >= 72);

// CUVIDPICPARAMS — Windows MSVC x64 layout (task #39 audit):
//   6×c_int                    = 24
//   n_bitstream_data_len u32   = 4   (cumulative 28)
//   [align 8]                  = +4  (32)
//   p_bitstream_data *const    = 8   (40)
//   n_num_slices u32           = 4   (44)
//   [align 8]                  = +4  (48)
//   p_slice_data_offsets       = 8   (56)
//   2×c_int                    = 8   (64)
//   30×c_uint reserved         = 120 (184)
//   1024×c_uint codec_specific = 4096 (4280)
// Total: 4280 bytes.
//
// The real SDK union variants (CUVIDH264PICPARAMS ~1.9 KiB with DPB+
// scaling lists, CUVIDHEVCPICPARAMS ~1.2 KiB, CUVIDAV1PICPARAMS ~1.7
// KiB, CUVIDVP9PICPARAMS ~0.5 KiB) all fit inside the 4096-byte
// CodecReserved[1024] fallback. Individual variant size asserts below.
const _: () = assert!(std::mem::size_of::<CuVideoPicParams>() == 4280);
// The codec_specific region must be exactly the 4096-byte SDK envelope.
// Separating this check from the whole-struct assert makes the diagnostic
// obvious when someone accidentally edits codec_specific's element type
// without updating the length (e.g. changes [c_uint;1024] → [u8;1024]).
const _: () = assert!(std::mem::size_of::<[c_uint; 1024]>() == 4096);

// CUVIDPARSERDISPINFO: 4×i32 + u64 = 24. Matches SDK.
const _: () = assert!(std::mem::size_of::<CuVideoDispInfo>() == 24);

// CUVIDSOURCEDATAPACKET: Windows MSVC x64 has c_ulong=4 →
//   flags (4) + payload_size (4) + [pad 0] + payload* (8) + timestamp u64 (8) = 24
// Linux x86_64 has c_ulong=8 →
//   flags (8) + payload_size (8) + payload* (8) + timestamp u64 (8) = 32
// Assert per-platform — a mismatch means the driver reads payload from
// the wrong offset and either segfaults or decodes random memory.
#[cfg(target_os = "windows")]
const _: () = assert!(std::mem::size_of::<CuVideoSourceDataPacket>() == 24);
#[cfg(all(target_os = "linux", target_pointer_width = "64"))]
const _: () = assert!(std::mem::size_of::<CuVideoSourceDataPacket>() == 32);

// CUVIDDECODECREATEINFO — Windows MSVC x64:
//   3×c_ulong (12) + 2×c_int (8) + 6×c_ulong (24) = 44
//   + 4×i16 display_area (8) = 52
//   + 2×c_int format/deinterlace (8) = 60
//   + 3×c_ulong target (12) = 72
//   + vid_lock ptr (8) = 80
//   + 4×i16 target_rect (8) = 88
//   + enable_histogram c_ulong (4) = 92
//   + 4×c_ulong reserved2 (16) = 108
//   + trailing 4 bytes align to 8-byte pointer alignment = 112
#[cfg(target_os = "windows")]
const _: () = assert!(std::mem::size_of::<CuVideoDecodeCreateInfo>() == 112);

// CUVIDPROCPARAMS — 4×i32 + 2×u32 + u64 + 2×u32 + u64 + 2×u32 + ptr
// + 46×u32 + ptr + ptr, with pointer alignment pads = 264.
const _: () = assert!(std::mem::size_of::<CuVideoProcParams>() == 264);

pub type FnCuvidCreateVideoParser =
    unsafe extern "C" fn(*mut CUvideoparser, *mut CuVideoParserParams) -> CUresult;
pub type FnCuvidParseVideoData =
    unsafe extern "C" fn(CUvideoparser, *mut CuVideoSourceDataPacket) -> CUresult;
pub type FnCuvidDestroyVideoParser = unsafe extern "C" fn(CUvideoparser) -> CUresult;
pub type FnCuvidCreateDecoder =
    unsafe extern "C" fn(*mut CUvideodecoder, *mut CuVideoDecodeCreateInfo) -> CUresult;
pub type FnCuvidDestroyDecoder = unsafe extern "C" fn(CUvideodecoder) -> CUresult;
pub type FnCuvidDecodePicture =
    unsafe extern "C" fn(CUvideodecoder, *mut CuVideoPicParams) -> CUresult;
pub type FnCuvidMapVideoFrame = unsafe extern "C" fn(
    CUvideodecoder,
    c_int,
    *mut CUdeviceptr,
    *mut c_uint,
    *mut CuVideoProcParams,
) -> CUresult;
pub type FnCuvidUnmapVideoFrame = unsafe extern "C" fn(CUvideodecoder, CUdeviceptr) -> CUresult;
pub type FnCuvidGetDecoderCaps = unsafe extern "C" fn(*mut CuVideoDecodeCaps) -> CUresult;

// CUVIDDECODECAPS (cuviddec.h, SDK 12.2): the caller fills the IN fields
// (codec / chroma / bit-depth) and the driver fills the OUT fields — whether
// the GPU's NVDEC supports that combination and its min/max dimensions. Run
// before `cuvidCreateDecoder` so an unsupported tuple is a clean typed error.
#[repr(C)]
#[derive(Clone, Copy)]
pub struct CuVideoDecodeCaps {
    // IN
    pub codec_type: c_int,
    pub chroma_format: c_int,
    pub bit_depth_minus8: u32,
    pub reserved1: [u32; 3],
    // OUT
    pub is_supported: u8,
    pub num_nvdecs: u8,
    pub output_format_mask: u16,
    pub max_width: u32,
    pub max_height: u32,
    pub max_mb_count: u32,
    pub min_width: u16,
    pub min_height: u16,
    pub num_output_surfaces: u8,
    pub reserved2: [u8; 3],
    pub reserved3: [u32; 8],
}
const _: () = assert!(std::mem::size_of::<CuVideoDecodeCaps>() == 80);

// ─── Codec constants ───────────────────────────────────────────────
pub const CUVID_H264: c_int = 4;
pub const CUVID_HEVC: c_int = 8;
pub const CUVID_VP8: c_int = 9;
pub const CUVID_VP9: c_int = 10;
pub const CUVID_AV1: c_int = 11;
pub const CUVID_MPEG2: c_int = 1;
pub const CUVID_MPEG4: c_int = 3;

pub const CUVID_PKT_ENDOFSTREAM: c_ulong = 1;
/// Tells the parser to associate the packet with its timestamp. Without
/// this flag the parser consumes data silently and may never emit
/// picture-complete callbacks. ffmpeg sets this on every data packet.
pub const CUVID_PKT_TIMESTAMP: c_ulong = 2;

// cudaVideoSurfaceFormat (cuviddec.h):
//   NV12 = 0    — 8-bit per sample, semi-planar (Y plane + interleaved UV)
//   P016 = 1    — 16-bit per sample, semi-planar; 10-bit data in the
//                 high 10 bits of each u16, low 6 bits zero-padded
//   YUV444 = 2  — 8-bit 4:4:4
//   YUV444_16 = 3 — 16-bit 4:4:4
// We only use NV12 (8-bit 4:2:0) and P016 (10/12-bit 4:2:0).
pub const CUVID_FMT_NV12: c_int = 0;
pub const CUVID_FMT_P016: c_int = 1;
pub const CUVID_CHROMA_420: c_int = 1;
/// Force the CUVID software decoder backend. On Windows the SDK
/// default may select DXVA, which produces different surface layouts
/// and is the suspected root cause of the H.264 segfault seen on
/// GPU boxes in testing. ffmpeg's cuviddec.c sets this unconditionally.
pub const CUVID_CREATE_PREFER_CUVID: c_ulong = 0x01;

/// Structural mirror of `CUVIDOPERATINGPOINTINFO` (nvcuvid.h). Not
/// read at runtime — the callback above returns a fixed value
/// without inspecting the struct — but the shape is documented here
/// so a future session implementing layer-selective decode has a
/// reference. Tagged with `#[allow(dead_code)]` to silence the
/// unused-field warnings.
#[repr(C)]
#[allow(dead_code)]
pub struct CuVideoOperatingPointInfo {
    codec: c_int,
    // Union: AV1 fields vs CodecReserved[1024].
    // AV1 variant:
    //   unsigned char  operating_points_cnt;
    //   unsigned char  reserved24_bits[3];
    //   unsigned short operating_points_idc[32];
    //   → 4 + 64 = 68 bytes
    // CodecReserved[1024] is the upper bound; assert below.
    reserved: [u8; 1024],
}
const _: () = assert!(std::mem::size_of::<CuVideoOperatingPointInfo>() <= 1024 + 8);