Skip to main content

codec/encode/tuning/
adapters.rs

1//! Per-encoder adapter functions.
2//!
3//! Each public function translates a `(QualityTarget, SpeedTier, width, height)`
4//! tuple into the concrete parameter struct for a specific encoder backend.
5//! Backend-private helpers (anchors, q-index mappers) live beside the
6//! function that uses them.
7
8use super::{
9    libaom_cq_for_target, nvenc_cq_for_target, piecewise_quality, tile_grid_hw, tile_grid_nvenc,
10    tile_grid_rav1e, NV_ENC_PRESET_P5_GUID_BYTES, NV_ENC_PRESET_P6_GUID_BYTES,
11    NV_ENC_PRESET_P7_GUID_BYTES, NVENC_TUNING_HIGH_QUALITY,
12};
13use super::params::{
14    AmfAv1Params, AmfQualityPreset, AmfRateControl, MFX_CODINGOPTION_ON, NvencAv1Params,
15    NvencRateControl, QsvAv1Params, QsvRateControl, Rav1eParams,
16};
17use super::{QualityTarget, SpeedTier};
18
19// ─── rav1e ───────────────────────────────────────────────────────
20
21/// Derive rav1e params for a given quality target + speed tier +
22/// resolution.
23pub fn rav1e_params(
24    target: QualityTarget,
25    tier: SpeedTier,
26    width: u32,
27    height: u32,
28) -> Rav1eParams {
29    // rav1e quantizer ≈ 4 × libaom cq-level (well-known rule of thumb;
30    // see docs/av1-tuning-research.md §2.3).
31    let libaom_cq = libaom_cq_for_target(target);
32    let quantizer = (libaom_cq as usize) * 4;
33
34    let speed_preset = match tier {
35        SpeedTier::Archive => 4,
36        SpeedTier::Standard => 6,
37        SpeedTier::Draft => 8,
38    };
39
40    // rav1e has high per-tile overhead and benefits from parallelism;
41    // use the generous tile grid at 4K (4x4 = 16 tiles).
42    let (tile_cols, tile_rows) = tile_grid_rav1e(width, height);
43
44    Rav1eParams {
45        quantizer,
46        speed_preset,
47        tile_rows,
48        tile_cols,
49    }
50}
51
52// ─── NVENC ───────────────────────────────────────────────────────
53
54/// Derive NVENC AV1 params for a given quality target + speed tier +
55/// resolution.
56pub fn nvenc_av1_params(
57    target: QualityTarget,
58    tier: SpeedTier,
59    width: u32,
60    height: u32,
61) -> NvencAv1Params {
62    // Calibrated CQ values: NVENC AV1 needs ~3-4 lower CQ to hit the
63    // same VMAF as libaom, compensating for its lower compression
64    // efficiency. See research §2.4.
65    let cq = nvenc_cq_for_target(target);
66
67    let (preset_guid, lookahead_depth, aq_strength) = match tier {
68        SpeedTier::Archive => (NV_ENC_PRESET_P7_GUID_BYTES, 32, 10),
69        SpeedTier::Standard => (NV_ENC_PRESET_P6_GUID_BYTES, 16, 8),
70        SpeedTier::Draft => (NV_ENC_PRESET_P5_GUID_BYTES, 0, 6),
71    };
72
73    // Archive tier uses CONSTQP for reproducible bitstreams; every
74    // other tier uses VBR with targetQuality so bitrate floats by
75    // content complexity.
76    let rc_mode = match target {
77        QualityTarget::VisuallyLossless => NvencRateControl::ConstQp,
78        _ => NvencRateControl::VbrTargetQuality,
79    };
80
81    // NVENC AV1 HQ tuning: fewer tiles = better compression because
82    // tile boundaries break loop-filter continuity and AV1 tiles are
83    // independently entropy-coded. Published measurements show ~0.6%
84    // VMAF loss at 2 tiles, ~1.3% at 4+ tiles on libaom; NVENC HQ
85    // exhibits the same scaling. NVENC has enough internal parallelism
86    // that it doesn't need 16-tile grids for throughput the way rav1e
87    // does — cap at 2x2 even at 4K.
88    //   Reference: research §3 and
89    //   https://streaminglearningcenter.com/codecs/av1-encoding-and-4k.html
90    let (num_tile_columns, num_tile_rows) = tile_grid_nvenc(width, height);
91
92    NvencAv1Params {
93        rc_mode,
94        cq,
95        preset_guid,
96        tuning_info: NVENC_TUNING_HIGH_QUALITY,
97        aq_strength,
98        lookahead_depth,
99        num_tile_columns: num_tile_columns as u32,
100        num_tile_rows: num_tile_rows as u32,
101        output_annex_b_format: 0, // LOB for MP4
102        repeat_seq_hdr: 1,
103    }
104}
105
106// ─── AMF ─────────────────────────────────────────────────────────
107
108/// Derive AMD AMF AV1 params for a given quality target + speed tier +
109/// resolution.
110///
111/// AMF's AV1 q-index scale is 0..255 (the full AV1 quantizer range, not
112/// the NVENC-style 0..63 CQ band). Start point is rav1e's `4 × libaom_cq`
113/// rule, then apply an 8-point calibration shift down to compensate for
114/// VCN's documented compression-efficiency gap vs libaom (same goughlui
115/// study that calibrated NVENC's 3-4-point CQ shift tested AMF VCN and
116/// reported an analogous ~2-point CQ-equivalent shift; 2 points × 4 ≈ 8
117/// in the 0..255 space).
118///
119/// TODO(calibrate): replace these seed anchors with calibrated values
120/// once av1-tuning-eng runs the offline VMAF pass on RDNA3 hardware.
121/// See `docs/av1-tuning-research.md` §2.5 for the calibration protocol.
122pub fn amf_av1_params(
123    target: QualityTarget,
124    tier: SpeedTier,
125    width: u32,
126    height: u32,
127) -> AmfAv1Params {
128    let q_index_intra = amf_q_index_for_target(target);
129    // Inter-frames get a slightly higher QP so P/B frames spend fewer
130    // bits — biases bit allocation toward keyframes, which matches how
131    // rav1e and NVENC CONSTQP mode behave.
132    let q_index_inter = q_index_intra.saturating_add(8);
133
134    // QVBR quality 1..100; higher = better. Map our VMAF-band targets
135    // to the AMF-native band: VL=95, High=85, Standard=70, Low=55.
136    let qvbr_quality = match target {
137        QualityTarget::VisuallyLossless => 95,
138        QualityTarget::High => 85,
139        QualityTarget::Standard => 70,
140        QualityTarget::Low => 55,
141        QualityTarget::Vmaf(v) => vmaf_to_qvbr_quality(v),
142    };
143
144    // AMF quality preset per SpeedTier. Archive → HighQuality (best
145    // but slowest), Standard → Quality, Draft → Balanced. `Speed`
146    // preset deliberately unused — same rule as NVENC's P1-P4
147    // exclusion (see research §2.4: no low-latency tunings for batch
148    // transcode).
149    let quality_preset = match tier {
150        SpeedTier::Archive => AmfQualityPreset::HighQuality,
151        SpeedTier::Standard => AmfQualityPreset::Quality,
152        SpeedTier::Draft => AmfQualityPreset::Balanced,
153    };
154
155    // CQP for archival-lossless runs (reproducible bitstream); QVBR
156    // for everything else — matches the NVENC branch structure.
157    let rc_mode = match target {
158        QualityTarget::VisuallyLossless => AmfRateControl::Cqp,
159        _ => AmfRateControl::QualityVbr,
160    };
161
162    // AMF VCN tile parallelism is similar to NVENC — fewer tiles =
163    // better compression. Share the NVENC 2×2 cap via `tile_grid_hw`
164    // (both are "HQ-equivalent HW encoders that don't need aggressive
165    // tiling for throughput"). Total tiles = cols × rows; at 1×1 that's
166    // one, at 2×2 that's 4.
167    let (tile_cols, tile_rows) = tile_grid_hw(width, height);
168    let tiles_per_frame = (tile_cols * tile_rows) as u32;
169
170    AmfAv1Params {
171        rc_mode,
172        q_index_intra,
173        q_index_inter,
174        qvbr_quality,
175        quality_preset,
176        gop_size: 0, // caller fills from keyframe_interval
177        aq_mode: 1,  // CAQ — content-adaptive QP on
178        tiles_per_frame,
179    }
180}
181
182/// AMF CQP q-index (0..255) for a given QualityTarget. Starts from
183/// `libaom_cq × 4` and subtracts an 8-point calibration shift to
184/// compensate for VCN's compression-efficiency gap — analogous to
185/// NVENC's 3-4-point CQ shift in 0..63 space.
186///
187/// TODO(calibrate): replace with anchors from the offline VMAF pass
188/// on RDNA3 hardware. Seed values come from av1-tuning-eng's research
189/// doc §2.5 and GPUOpen AMF tuning guide.
190fn amf_q_index_for_target(target: QualityTarget) -> u8 {
191    let base = match target {
192        QualityTarget::VisuallyLossless => 72, // libaom 20 × 4 - 8
193        QualityTarget::High => 100,            // libaom 27 × 4 - 8
194        QualityTarget::Standard => 120,        // libaom 32 × 4 - 8
195        QualityTarget::Low => 144,             // libaom 38 × 4 - 8
196        QualityTarget::Vmaf(v) => vmaf_to_amf_q_index(v),
197    };
198    base.min(255) as u8
199}
200
201/// Anchors for AMF q-index interpolation when a caller passes an
202/// explicit Vmaf target. Descending VMAF → ascending q-index.
203const AMF_Q_INDEX_ANCHORS: &[(i32, i32)] = &[
204    (100, 50), // asymptote below VisuallyLossless
205    (98, 72),
206    (95, 100),
207    (90, 120),
208    (85, 144),
209    (70, 200),
210];
211
212fn vmaf_to_amf_q_index(vmaf: u8) -> u16 {
213    piecewise_quality(vmaf, AMF_Q_INDEX_ANCHORS, 0, 255) as u16
214}
215
216/// AMF anchors: AMF's QVBR quality scale is 1..100 (higher = better).
217/// Calibrated from research §2.5 against libaom at matched VMAF.
218const AMF_QVBR_ANCHORS: &[(i32, i32)] =
219    &[(100, 100), (98, 95), (95, 85), (90, 70), (85, 55), (70, 35)];
220
221fn vmaf_to_qvbr_quality(vmaf: u8) -> u8 {
222    piecewise_quality(vmaf, AMF_QVBR_ANCHORS, 1, 100)
223}
224
225// ─── QSV ─────────────────────────────────────────────────────────
226
227/// Derive Intel QSV AV1 params for a given quality target + speed tier +
228/// resolution.
229///
230/// oneVPL exposes two sensible modes for quality-driven encoding: ICQ
231/// (intelligent constant quality, 1..51 for AV1 — 1=best) and CQP
232/// (constant q-index, 0..255). ICQ is the default; CQP is the archival
233/// path. ICQ quality maps near-linearly to libaom cq-level at the range
234/// we care about (research §2.6, calibrated from Intel's public
235/// oneVPL sample_encode benchmarks).
236pub fn qsv_av1_params(
237    target: QualityTarget,
238    tier: SpeedTier,
239    width: u32,
240    height: u32,
241) -> QsvAv1Params {
242    // ICQ quality 1..51; 1=best. QSV maps AV1's native 0..63 CQ range
243    // into the 0..51 scale for API parity with H.264/HEVC (oneVPL
244    // idiosyncrasy), so we scale libaom cq-level by 51/63 ≈ 0.81.
245    //   VL: libaom 20 × 51/63 ≈ 16
246    //   Hi: libaom 27 × 51/63 ≈ 22
247    //   Std: libaom 32 × 51/63 ≈ 26
248    //   Low: libaom 38 × 51/63 ≈ 31
249    let icq_quality = match target {
250        QualityTarget::VisuallyLossless => 16,
251        QualityTarget::High => 22,
252        QualityTarget::Standard => 26,
253        QualityTarget::Low => 31,
254        QualityTarget::Vmaf(v) => vmaf_to_qsv_icq(v),
255    };
256    // CQP q-index for archival — QSV uses the full AV1 0..255 range
257    // via `mfx.QPI`. Same 4× libaom mapping as rav1e/AMF.
258    let libaom_cq = libaom_cq_for_target(target);
259    let qp_i = (libaom_cq as u16 * 4).min(255);
260    let qp_p = qp_i.saturating_add(8).min(255);
261
262    // oneVPL TargetUsage: 1=best quality, 7=best speed. Per
263    // av1-tuning-eng review: Archive=1, Standard=4, Draft=6
264    // (not 7 — 6 still leaves headroom for the driver's
265    // "adaptive speed" selections without falling into the explicit
266    // "worst-quality" bucket).
267    let target_usage = match tier {
268        SpeedTier::Archive => 1,
269        SpeedTier::Standard => 4,
270        SpeedTier::Draft => 6,
271    };
272
273    let rc_mode = match target {
274        QualityTarget::VisuallyLossless => QsvRateControl::Cqp,
275        _ => QsvRateControl::Icq,
276    };
277
278    let (num_tile_columns, num_tile_rows) = tile_grid_hw(width, height);
279
280    QsvAv1Params {
281        rc_mode,
282        icq_quality,
283        qp_i,
284        qp_p,
285        target_usage,
286        gop_pic_size: 0, // caller fills from keyframe_interval
287        num_tile_columns: num_tile_columns as u8,
288        num_tile_rows: num_tile_rows as u8,
289        // AV1 QSV encode is VDENC (low-power) only on Arc / Meteor Lake+.
290        low_power: MFX_CODINGOPTION_ON,
291    }
292}
293
294/// QSV ICQ scale is 1..51 (lower = better), inverted from AMF's QVBR.
295/// Anchor table reflects Intel's public oneVPL sample benchmarks.
296const QSV_ICQ_ANCHORS: &[(i32, i32)] =
297    &[(100, 8), (98, 18), (95, 24), (90, 30), (85, 36), (70, 48)];
298
299fn vmaf_to_qsv_icq(vmaf: u8) -> u16 {
300    piecewise_quality(vmaf, QSV_ICQ_ANCHORS, 1, 51) as u16
301}