codec/encode/tuning/adapters.rs
1//! Per-encoder adapter functions.
2//!
3//! Each public function translates a `(QualityTarget, SpeedTier, width, height)`
4//! tuple into the concrete parameter struct for a specific encoder backend.
5//! Backend-private helpers (anchors, q-index mappers) live beside the
6//! function that uses them.
7
8use super::{
9 libaom_cq_for_target, nvenc_cq_for_target, piecewise_quality, tile_grid_hw, tile_grid_nvenc,
10 tile_grid_rav1e, NV_ENC_PRESET_P5_GUID_BYTES, NV_ENC_PRESET_P6_GUID_BYTES,
11 NV_ENC_PRESET_P7_GUID_BYTES, NVENC_TUNING_HIGH_QUALITY,
12};
13use super::params::{
14 AmfAv1Params, AmfQualityPreset, AmfRateControl, MFX_CODINGOPTION_ON, NvencAv1Params,
15 NvencRateControl, QsvAv1Params, QsvRateControl, Rav1eParams,
16};
17use super::{QualityTarget, SpeedTier};
18
19// ─── rav1e ───────────────────────────────────────────────────────
20
21/// Derive rav1e params for a given quality target + speed tier +
22/// resolution.
23pub fn rav1e_params(
24 target: QualityTarget,
25 tier: SpeedTier,
26 width: u32,
27 height: u32,
28) -> Rav1eParams {
29 // rav1e quantizer ≈ 4 × libaom cq-level (well-known rule of thumb;
30 // see docs/av1-tuning-research.md §2.3).
31 let libaom_cq = libaom_cq_for_target(target);
32 let quantizer = (libaom_cq as usize) * 4;
33
34 let speed_preset = match tier {
35 SpeedTier::Archive => 4,
36 SpeedTier::Standard => 6,
37 SpeedTier::Draft => 8,
38 };
39
40 // rav1e has high per-tile overhead and benefits from parallelism;
41 // use the generous tile grid at 4K (4x4 = 16 tiles).
42 let (tile_cols, tile_rows) = tile_grid_rav1e(width, height);
43
44 Rav1eParams {
45 quantizer,
46 speed_preset,
47 tile_rows,
48 tile_cols,
49 }
50}
51
52// ─── NVENC ───────────────────────────────────────────────────────
53
54/// Derive NVENC AV1 params for a given quality target + speed tier +
55/// resolution.
56pub fn nvenc_av1_params(
57 target: QualityTarget,
58 tier: SpeedTier,
59 width: u32,
60 height: u32,
61) -> NvencAv1Params {
62 // Calibrated CQ values: NVENC AV1 needs ~3-4 lower CQ to hit the
63 // same VMAF as libaom, compensating for its lower compression
64 // efficiency. See research §2.4.
65 let cq = nvenc_cq_for_target(target);
66
67 let (preset_guid, lookahead_depth, aq_strength) = match tier {
68 SpeedTier::Archive => (NV_ENC_PRESET_P7_GUID_BYTES, 32, 10),
69 SpeedTier::Standard => (NV_ENC_PRESET_P6_GUID_BYTES, 16, 8),
70 SpeedTier::Draft => (NV_ENC_PRESET_P5_GUID_BYTES, 0, 6),
71 };
72
73 // Archive tier uses CONSTQP for reproducible bitstreams; every
74 // other tier uses VBR with targetQuality so bitrate floats by
75 // content complexity.
76 let rc_mode = match target {
77 QualityTarget::VisuallyLossless => NvencRateControl::ConstQp,
78 _ => NvencRateControl::VbrTargetQuality,
79 };
80
81 // NVENC AV1 HQ tuning: fewer tiles = better compression because
82 // tile boundaries break loop-filter continuity and AV1 tiles are
83 // independently entropy-coded. Published measurements show ~0.6%
84 // VMAF loss at 2 tiles, ~1.3% at 4+ tiles on libaom; NVENC HQ
85 // exhibits the same scaling. NVENC has enough internal parallelism
86 // that it doesn't need 16-tile grids for throughput the way rav1e
87 // does — cap at 2x2 even at 4K.
88 // Reference: research §3 and
89 // https://streaminglearningcenter.com/codecs/av1-encoding-and-4k.html
90 let (num_tile_columns, num_tile_rows) = tile_grid_nvenc(width, height);
91
92 NvencAv1Params {
93 rc_mode,
94 cq,
95 preset_guid,
96 tuning_info: NVENC_TUNING_HIGH_QUALITY,
97 aq_strength,
98 lookahead_depth,
99 num_tile_columns: num_tile_columns as u32,
100 num_tile_rows: num_tile_rows as u32,
101 output_annex_b_format: 0, // LOB for MP4
102 repeat_seq_hdr: 1,
103 }
104}
105
106// ─── AMF ─────────────────────────────────────────────────────────
107
108/// Derive AMD AMF AV1 params for a given quality target + speed tier +
109/// resolution.
110///
111/// AMF's AV1 q-index scale is 0..255 (the full AV1 quantizer range, not
112/// the NVENC-style 0..63 CQ band). Start point is rav1e's `4 × libaom_cq`
113/// rule, then apply an 8-point calibration shift down to compensate for
114/// VCN's documented compression-efficiency gap vs libaom (same goughlui
115/// study that calibrated NVENC's 3-4-point CQ shift tested AMF VCN and
116/// reported an analogous ~2-point CQ-equivalent shift; 2 points × 4 ≈ 8
117/// in the 0..255 space).
118///
119/// TODO(calibrate): replace these seed anchors with calibrated values
120/// once av1-tuning-eng runs the offline VMAF pass on RDNA3 hardware.
121/// See `docs/av1-tuning-research.md` §2.5 for the calibration protocol.
122pub fn amf_av1_params(
123 target: QualityTarget,
124 tier: SpeedTier,
125 width: u32,
126 height: u32,
127) -> AmfAv1Params {
128 let q_index_intra = amf_q_index_for_target(target);
129 // Inter-frames get a slightly higher QP so P/B frames spend fewer
130 // bits — biases bit allocation toward keyframes, which matches how
131 // rav1e and NVENC CONSTQP mode behave.
132 let q_index_inter = q_index_intra.saturating_add(8);
133
134 // QVBR quality 1..100; higher = better. Map our VMAF-band targets
135 // to the AMF-native band: VL=95, High=85, Standard=70, Low=55.
136 let qvbr_quality = match target {
137 QualityTarget::VisuallyLossless => 95,
138 QualityTarget::High => 85,
139 QualityTarget::Standard => 70,
140 QualityTarget::Low => 55,
141 QualityTarget::Vmaf(v) => vmaf_to_qvbr_quality(v),
142 };
143
144 // AMF quality preset per SpeedTier. Archive → HighQuality (best
145 // but slowest), Standard → Quality, Draft → Balanced. `Speed`
146 // preset deliberately unused — same rule as NVENC's P1-P4
147 // exclusion (see research §2.4: no low-latency tunings for batch
148 // transcode).
149 let quality_preset = match tier {
150 SpeedTier::Archive => AmfQualityPreset::HighQuality,
151 SpeedTier::Standard => AmfQualityPreset::Quality,
152 SpeedTier::Draft => AmfQualityPreset::Balanced,
153 };
154
155 // CQP for archival-lossless runs (reproducible bitstream); QVBR
156 // for everything else — matches the NVENC branch structure.
157 let rc_mode = match target {
158 QualityTarget::VisuallyLossless => AmfRateControl::Cqp,
159 _ => AmfRateControl::QualityVbr,
160 };
161
162 // AMF VCN tile parallelism is similar to NVENC — fewer tiles =
163 // better compression. Share the NVENC 2×2 cap via `tile_grid_hw`
164 // (both are "HQ-equivalent HW encoders that don't need aggressive
165 // tiling for throughput"). Total tiles = cols × rows; at 1×1 that's
166 // one, at 2×2 that's 4.
167 let (tile_cols, tile_rows) = tile_grid_hw(width, height);
168 let tiles_per_frame = (tile_cols * tile_rows) as u32;
169
170 AmfAv1Params {
171 rc_mode,
172 q_index_intra,
173 q_index_inter,
174 qvbr_quality,
175 quality_preset,
176 gop_size: 0, // caller fills from keyframe_interval
177 aq_mode: 1, // CAQ — content-adaptive QP on
178 tiles_per_frame,
179 }
180}
181
182/// AMF CQP q-index (0..255) for a given QualityTarget. Starts from
183/// `libaom_cq × 4` and subtracts an 8-point calibration shift to
184/// compensate for VCN's compression-efficiency gap — analogous to
185/// NVENC's 3-4-point CQ shift in 0..63 space.
186///
187/// TODO(calibrate): replace with anchors from the offline VMAF pass
188/// on RDNA3 hardware. Seed values come from av1-tuning-eng's research
189/// doc §2.5 and GPUOpen AMF tuning guide.
190fn amf_q_index_for_target(target: QualityTarget) -> u8 {
191 let base = match target {
192 QualityTarget::VisuallyLossless => 72, // libaom 20 × 4 - 8
193 QualityTarget::High => 100, // libaom 27 × 4 - 8
194 QualityTarget::Standard => 120, // libaom 32 × 4 - 8
195 QualityTarget::Low => 144, // libaom 38 × 4 - 8
196 QualityTarget::Vmaf(v) => vmaf_to_amf_q_index(v),
197 };
198 base.min(255) as u8
199}
200
201/// Anchors for AMF q-index interpolation when a caller passes an
202/// explicit Vmaf target. Descending VMAF → ascending q-index.
203const AMF_Q_INDEX_ANCHORS: &[(i32, i32)] = &[
204 (100, 50), // asymptote below VisuallyLossless
205 (98, 72),
206 (95, 100),
207 (90, 120),
208 (85, 144),
209 (70, 200),
210];
211
212fn vmaf_to_amf_q_index(vmaf: u8) -> u16 {
213 piecewise_quality(vmaf, AMF_Q_INDEX_ANCHORS, 0, 255) as u16
214}
215
216/// AMF anchors: AMF's QVBR quality scale is 1..100 (higher = better).
217/// Calibrated from research §2.5 against libaom at matched VMAF.
218const AMF_QVBR_ANCHORS: &[(i32, i32)] =
219 &[(100, 100), (98, 95), (95, 85), (90, 70), (85, 55), (70, 35)];
220
221fn vmaf_to_qvbr_quality(vmaf: u8) -> u8 {
222 piecewise_quality(vmaf, AMF_QVBR_ANCHORS, 1, 100)
223}
224
225// ─── QSV ─────────────────────────────────────────────────────────
226
227/// Derive Intel QSV AV1 params for a given quality target + speed tier +
228/// resolution.
229///
230/// oneVPL exposes two sensible modes for quality-driven encoding: ICQ
231/// (intelligent constant quality, 1..51 for AV1 — 1=best) and CQP
232/// (constant q-index, 0..255). ICQ is the default; CQP is the archival
233/// path. ICQ quality maps near-linearly to libaom cq-level at the range
234/// we care about (research §2.6, calibrated from Intel's public
235/// oneVPL sample_encode benchmarks).
236pub fn qsv_av1_params(
237 target: QualityTarget,
238 tier: SpeedTier,
239 width: u32,
240 height: u32,
241) -> QsvAv1Params {
242 // ICQ quality 1..51; 1=best. QSV maps AV1's native 0..63 CQ range
243 // into the 0..51 scale for API parity with H.264/HEVC (oneVPL
244 // idiosyncrasy), so we scale libaom cq-level by 51/63 ≈ 0.81.
245 // VL: libaom 20 × 51/63 ≈ 16
246 // Hi: libaom 27 × 51/63 ≈ 22
247 // Std: libaom 32 × 51/63 ≈ 26
248 // Low: libaom 38 × 51/63 ≈ 31
249 let icq_quality = match target {
250 QualityTarget::VisuallyLossless => 16,
251 QualityTarget::High => 22,
252 QualityTarget::Standard => 26,
253 QualityTarget::Low => 31,
254 QualityTarget::Vmaf(v) => vmaf_to_qsv_icq(v),
255 };
256 // CQP q-index for archival — QSV uses the full AV1 0..255 range
257 // via `mfx.QPI`. Same 4× libaom mapping as rav1e/AMF.
258 let libaom_cq = libaom_cq_for_target(target);
259 let qp_i = (libaom_cq as u16 * 4).min(255);
260 let qp_p = qp_i.saturating_add(8).min(255);
261
262 // oneVPL TargetUsage: 1=best quality, 7=best speed. Per
263 // av1-tuning-eng review: Archive=1, Standard=4, Draft=6
264 // (not 7 — 6 still leaves headroom for the driver's
265 // "adaptive speed" selections without falling into the explicit
266 // "worst-quality" bucket).
267 let target_usage = match tier {
268 SpeedTier::Archive => 1,
269 SpeedTier::Standard => 4,
270 SpeedTier::Draft => 6,
271 };
272
273 let rc_mode = match target {
274 QualityTarget::VisuallyLossless => QsvRateControl::Cqp,
275 _ => QsvRateControl::Icq,
276 };
277
278 let (num_tile_columns, num_tile_rows) = tile_grid_hw(width, height);
279
280 QsvAv1Params {
281 rc_mode,
282 icq_quality,
283 qp_i,
284 qp_p,
285 target_usage,
286 gop_pic_size: 0, // caller fills from keyframe_interval
287 num_tile_columns: num_tile_columns as u8,
288 num_tile_rows: num_tile_rows as u8,
289 // AV1 QSV encode is VDENC (low-power) only on Arc / Meteor Lake+.
290 low_power: MFX_CODINGOPTION_ON,
291 }
292}
293
294/// QSV ICQ scale is 1..51 (lower = better), inverted from AMF's QVBR.
295/// Anchor table reflects Intel's public oneVPL sample benchmarks.
296const QSV_ICQ_ANCHORS: &[(i32, i32)] =
297 &[(100, 8), (98, 18), (95, 24), (90, 30), (85, 36), (70, 48)];
298
299fn vmaf_to_qsv_icq(vmaf: u8) -> u16 {
300 piecewise_quality(vmaf, QSV_ICQ_ANCHORS, 1, 51) as u16
301}