jxl_encoder/effort.rs
1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! Centralized effort-derived encoder decisions.
6//!
7//! Every effort-gated decision in the encoder reads from an [`EffortProfile`]
8//! instead of checking `if effort >= N` inline. Construct once from
9//! `(effort, mode)`, then pass to all subsystems.
10
11use crate::api::EncoderMode;
12use crate::entropy_coding::lz77::Lz77Method;
13
14/// Per-strategy raw entropy multipliers for the AC strategy cost model.
15///
16/// These control the relative preference for each transform type in AC strategy
17/// selection. Higher values penalize a strategy (making it less likely to be chosen);
18/// lower values favor it. The 8x8-class values are normalized by DCT8's value before
19/// use, so DCT8 always evaluates at 1.0. Larger transforms use raw values directly.
20///
21/// Default values match libjxl `enc_ac_strategy.cc:584` (`kTransforms8x8[i].entropy_mul`).
22/// Experimental values from libjxl PR #4506 (Jon Sneyers, VarDCT cost tuning).
23///
24/// `#[non_exhaustive]` so future libjxl-side strategy additions can land
25/// without a breaking change. Construct via [`Self::reference`] or
26/// [`Self::experimental`] and mutate fields as needed.
27#[derive(Clone, Debug)]
28#[non_exhaustive]
29pub struct EntropyMulTable {
30 /// DCT8 base value. All 8x8-class transforms are normalized by this.
31 /// Reference: 0.8 (libjxl `enc_ac_strategy.cc:357`, `kTransforms8x8[0].entropy_mul`).
32 pub dct8: f32,
33
34 /// DCT4x4 (four 4x4 sub-blocks per 8x8 block).
35 /// Reference: 1.08. Experimental: 0.88 (PR #4506, ~19% reduction).
36 /// Lowering favors DCT4x4 for textured/detailed regions (screenshots, text).
37 pub dct4x4: f32,
38
39 /// DCT4x8 / DCT8x4 (half-block transforms for edges/detail).
40 /// Reference: 0.859316 (libjxl `enc_ac_strategy.cc`).
41 pub dct4x8: f32,
42
43 /// Identity (pixel copy, no transform).
44 /// Reference: 1.0428. Experimental: 0.88 (PR #4506, ~16% reduction).
45 /// Lowering favors identity blocks for flat/noisy regions.
46 pub identity: f32,
47
48 /// DCT2x2 (2x2 Hadamard-like transform).
49 /// Reference: 0.95 (libjxl `enc_ac_strategy.cc`).
50 pub dct2x2: f32,
51
52 /// AFV (Adaptive Frequency Variable, corner DCT).
53 /// Reference: 0.818. Experimental: 0.75 (PR #4506, ~8% reduction).
54 /// Lowering favors AFV for edge blocks with mixed content.
55 pub afv: f32,
56
57 /// DCT16x8 / DCT8x16 (larger transforms use raw values, not normalized by DCT8).
58 /// Reference: 1.21 (libjxl `enc_ac_strategy.cc`).
59 pub dct16x8: f32,
60
61 /// DCT16x16.
62 /// Reference: 1.34 (libjxl `enc_ac_strategy.cc`).
63 pub dct16x16: f32,
64
65 /// DCT16x32 / DCT32x16.
66 /// Reference: 1.49 (libjxl `enc_ac_strategy.cc`).
67 pub dct16x32: f32,
68
69 /// DCT32x32.
70 /// Reference: 1.48 (libjxl `enc_ac_strategy.cc`).
71 pub dct32x32: f32,
72
73 /// DCT64x32 / DCT32x64.
74 /// Reference: 2.25 (libjxl `enc_ac_strategy.cc`).
75 pub dct64x32: f32,
76
77 /// DCT64x64.
78 /// Reference: 2.25 (libjxl `enc_ac_strategy.cc`).
79 pub dct64x64: f32,
80}
81
82impl EntropyMulTable {
83 /// Default values matching libjxl `enc_ac_strategy.cc:584`.
84 pub fn reference() -> Self {
85 Self {
86 dct8: 0.8,
87 dct4x4: 1.08,
88 dct4x8: 0.859_316_37,
89 identity: 1.0428,
90 dct2x2: 0.95,
91 afv: 0.817_794_9,
92 dct16x8: 1.21,
93 dct16x16: 1.34,
94 dct16x32: 1.49,
95 dct32x32: 1.48,
96 dct64x32: 2.25,
97 dct64x64: 2.25,
98 }
99 }
100
101 /// Experimental values from libjxl PR #4506 (Jon Sneyers, VarDCT cost tuning).
102 ///
103 /// Changes vs reference:
104 /// - dct4x4: 1.08 → 0.88 (~19% reduction) — favor detail-preserving 4x4 sub-blocks
105 /// - identity: 1.0428 → 0.88 (~16% reduction) — favor pixel-copy for flat regions
106 /// - afv: 0.818 → 0.75 (~8% reduction) — favor corner DCT for edge blocks
107 pub fn experimental() -> Self {
108 Self {
109 dct4x4: 0.88,
110 identity: 0.88,
111 afv: 0.75,
112 ..Self::reference()
113 }
114 }
115}
116
117/// All effort-derived encoder decisions, centralized.
118///
119/// Replaces scattered `if effort >= N` checks throughout the codebase.
120/// Construct once from (effort, mode, encoding path), pass to all subsystems.
121///
122/// **Field categories**:
123/// - **Effort-derived**: changes value across effort levels (e.g., `nb_rcts_to_try`,
124/// `tree_max_buckets`, `butteraugli_iters`).
125/// - **Tuning constants**: same value at every effort in the reference profile,
126/// mode-dependent in experimental (e.g., `k_favor_2x2`, `k_info_loss_mul_base`,
127/// `entropy_mul_table`, `k8x8` etc.). The picker can dial these independently
128/// of effort.
129///
130/// `#[non_exhaustive]` so we can grow the field set as the picker discovers new
131/// useful knobs without breaking external `EffortProfile { ... }` constructions.
132/// Construct via [`Self::lossy`] or [`Self::lossless`] and mutate fields as needed.
133#[derive(Clone, Debug)]
134#[non_exhaustive]
135pub struct EffortProfile {
136 /// The raw effort level (1–10).
137 pub effort: u8,
138
139 // ─── Feature flags ───────────────────────────────────────────────────
140 /// Use ANS entropy coding instead of Huffman.
141 pub use_ans: bool,
142 /// Use two-pass mode with optimized entropy codes.
143 pub optimize_codes: bool,
144 /// Use custom coefficient ordering (AC scan order from statistics).
145 pub custom_orders: bool,
146 /// Enable gaborish inverse pre-filter.
147 pub gaborish: bool,
148 /// Enable pixel-domain loss in AC strategy selection.
149 pub pixel_domain_loss: bool,
150 /// Enable error diffusion in AC quantization.
151 pub error_diffusion: bool,
152 /// Enable patches/dictionary detection.
153 pub patches: bool,
154 /// Enable content-adaptive MA tree learning (modular path).
155 pub tree_learning: bool,
156 /// Enable LZ77 backward references in entropy coding.
157 pub lz77: bool,
158 /// LZ77 method when lz77 is enabled.
159 pub lz77_method: Lz77Method,
160 /// Number of butteraugli quantization loop iterations.
161 pub butteraugli_iters: u32,
162
163 // ─── AC strategy search ──────────────────────────────────────────────
164 /// Enable adaptive AC strategy selection (multi-block transforms).
165 pub ac_strategy_enabled: bool,
166 /// Try DCT16x16/DCT16x8/DCT8x16 transforms (multi-block 16x16 merges).
167 pub try_dct16: bool,
168 /// Try DCT32x32/DCT32x16/DCT16x32 transforms.
169 pub try_dct32: bool,
170 /// Try DCT64x64/DCT64x32/DCT32x64 transforms.
171 pub try_dct64: bool,
172 /// Try DCT4x8/DCT8x4/DCT4x4/AFV transforms (effort >= 6 in libjxl).
173 pub try_dct4x8_afv: bool,
174 /// Enable non-aligned evaluation pass (odd-aligned 16x16 regions).
175 pub non_aligned_eval: bool,
176 /// Step size for fine-grained AC strategy search on 32x32+ blocks.
177 /// 1 = every position (effort 9+), 2 = every other (default).
178 pub fine_grained_step: u8,
179
180 // ─── VarDCT pipeline options ──────────────────────────────────────────
181 /// Apply pixel-level chromacity adjustments (effort >= 7 in libjxl).
182 pub chromacity_adjustment: bool,
183 /// Use pair-merge clustering for VarDCT entropy codes (effort >= 9 in libjxl).
184 /// When false, uses fast k-means-only clustering.
185 pub enhanced_clustering_vardct: bool,
186 /// Optimize per-histogram HybridUint configs for VarDCT entropy codes.
187 /// libjxl uses uint_method=kNone (no optimization, default {4,2,0}) at effort < 9.
188 /// The fast optimization picks non-default configs whose signaling overhead
189 /// exceeds their coding benefit on VarDCT token distributions.
190 pub optimize_uint_configs_vardct: bool,
191 /// Compute per-block dynamic EPF sharpness (effort >= 6 in libjxl).
192 pub epf_dynamic_sharpness: bool,
193 /// Recompute CfL map after initial quantization for better estimates (effort >= 7 in libjxl).
194 pub cfl_two_pass: bool,
195 /// Use Newton's method (perceptual cost model) for CfL fitting (effort >= 7 in libjxl).
196 /// When false, uses fast least-squares fitting (quadratic cost, single-pass).
197 pub cfl_newton: bool,
198 /// Newton finite-difference epsilon for CfL fitting.
199 /// Controls second-derivative accuracy. Default 1.0 (libjxl uses 100.0, which oscillates).
200 pub cfl_newton_eps: f32,
201 /// Maximum Newton iterations for CfL fitting. Default 10 (libjxl uses 20).
202 pub cfl_newton_max_iters: usize,
203
204 // ─── Quantization ────────────────────────────────────────────────────
205 /// Use adaptive (content-dependent) quant field via InitialQuantField.
206 /// When false (effort < 5), uses flat quant field = 0.79/distance.
207 /// Matches libjxl enc_heuristics.cc:1097-1128.
208 pub use_adaptive_quant: bool,
209 /// Enable per-block AdjustQuantBlockAC (effort >= 5 in libjxl).
210 pub adjust_quant_ac: bool,
211 /// Numerator for the effort-fixed q parameter used in global_scale computation.
212 /// libjxl: 0.39 at effort >= 5, 0.79 at effort < 5.
213 /// global_scale = 65536 * (initial_q_numerator / distance) / 5.0
214 pub initial_q_numerator: f32,
215 /// Fixed quantization thresholds applied per-coefficient on the Y channel
216 /// when [`Self::adjust_quant_ac`] is `false`.
217 ///
218 /// Pipeline stage: VarDCT post-DCT quantization (`vardct/transform.rs`).
219 /// The four entries gate progressively higher coefficient bands; values
220 /// below the threshold round to zero.
221 /// From libjxl `enc_group.cc:358` (`kThresholdMul` constants for low-effort path).
222 /// Lowering the entries preserves more high-frequency Y detail at the cost
223 /// of bitrate; raising flattens texture. Override when an asset class needs
224 /// different texture-vs-bitrate balance than the libjxl defaults give.
225 pub fixed_thresholds_y: [f32; 4],
226 /// Initial quantization thresholds used when [`Self::adjust_quant_ac`] is
227 /// `true` (effort >= 5). Per-block adjustment iterates from these.
228 /// From libjxl `enc_group.cc:390`.
229 /// Pipeline stage: VarDCT post-DCT quantization, prior to the
230 /// `AdjustQuantBlockAC` per-block tweak. Useful as a starting point for
231 /// pickers exploring the threshold-vs-rate frontier per content class.
232 pub adjust_thresholds: [f32; 4],
233
234 // ─── Cost model constants ────────────────────────────────────────────
235 // All five `k_*` constants below feed `vardct/ac_strategy_search.rs`
236 // (the per-8×8 cost evaluator that picks DCT8 vs DCT4x4 vs IDENTITY vs
237 // larger merges). Default values come from libjxl's reference encoder
238 // and are *the same at every effort level* — they describe the cost
239 // model itself, not the search depth. The picker / sweep harness uses
240 // them to retune the model per content class without touching effort.
241 /// kFavor2X2AtHighQuality weight (-0.4 in libjxl,
242 /// `enc_ac_strategy.cc::kFavor2X2AtHighQuality`).
243 /// Applied as `k_favor_2x2 * ((5-distance)/5)^2` to IDENTITY/DCT2X2
244 /// entropy at distance < 5. More-negative values aggressively favor
245 /// pixel-copy / 2×2 blocks at low distances; useful for screenshots /
246 /// pixel art where the default photo-tuned bias under-uses IDENTITY.
247 pub k_favor_2x2: f32,
248 /// Base penalty added to every non-DCT8 strategy's cost
249 /// (libjxl `kAvoidEntropyOfTransforms = 0.5`,
250 /// `enc_ac_strategy.cc::EvalAcStrategy`). Higher values discourage the
251 /// AC strategy search from leaving DCT8; lower values let it spread to
252 /// IDENTITY / DCT4x4 / DCT16x16 more freely.
253 pub k_avoid_transforms_base: f32,
254 /// Base multiplier on the IDCT-domain (pixel-domain) error term in
255 /// `EstimateEntropy` (libjxl 1.2, `enc_ac_strategy.cc`).
256 /// PR #4506 raised this to 1.3 for the experimental profile — heavier
257 /// weight on visible artifacts vs coefficient-domain entropy.
258 pub k_info_loss_mul_base: f32,
259 /// Base multiplier on the zero-coefficient cost term (libjxl 9.309,
260 /// `enc_ac_strategy.cc`). Increasing rewards strategies that leave
261 /// many coefficients exactly zero (boosts large-DCT use on smooth
262 /// regions). Lowering lets non-zero residuals stay cheaper.
263 pub k_zeros_mul_base: f32,
264 /// Base delta added inside the cost-model interpolation (libjxl 10.833,
265 /// `enc_ac_strategy.cc`). Acts as an "exchange rate" between rate
266 /// (entropy proxy) and distortion (info-loss term); rarely retuned
267 /// outside picker/sweep work.
268 pub k_cost_delta_base: f32,
269 /// Quantization-cost constant used when materializing the initial
270 /// quant field (libjxl 0.765, `enc_adaptive_quantization.cc`). Read by
271 /// `vardct/precomputed.rs` and `vardct/encoder.rs`. Lower values
272 /// produce a coarser initial field (less rate, more distortion);
273 /// higher refines.
274 pub k_ac_quant: f32,
275
276 // ─── Coefficient-domain multiplier constants ─────────────────────────
277 // Each tuple is `(mul1, mul2, base)` for the EstimateEntropy /
278 // info-loss formula in `vardct/ac_strategy_search.rs`. `mul1` weights
279 // the negative log-rate term, `mul2` weights the AC magnitude term,
280 // and `base` is added unconditionally. Defaults come from libjxl's
281 // `enc_ac_strategy.cc`. Mode-/effort-independent in both reference
282 // and experimental — cost-model knobs the picker can dial.
283 /// DCT8x8 coefficient-domain multiplier `(mul1, mul2, base)`.
284 /// Note: stored values include libjxl's 0.75 factor on `mul1`/`mul2`
285 /// (applied at `enc_ac_strategy.cc:790` for 8×8-class transforms).
286 pub k8x8: (f32, f32, f32),
287 /// DCT16x8 / DCT8x16 coefficient-domain multiplier `(mul1, mul2, base)`.
288 /// Larger transforms skip the 0.75 factor and use the libjxl raw values.
289 pub k16x8: (f32, f32, f32),
290 /// DCT16x16 coefficient-domain multiplier `(mul1, mul2, base)`.
291 pub k16x16: (f32, f32, f32),
292 /// DCT4x8 / DCT8x4 coefficient-domain multiplier `(mul1, mul2, base)`.
293 /// 4×N strategies share the 0.75 factor with 8×8.
294 pub k4x8: (f32, f32, f32),
295 /// DCT4x4 coefficient-domain multiplier `(mul1, mul2, base)`.
296 /// 4×4 strategies share the 0.75 factor with 8×8.
297 pub k4x4: (f32, f32, f32),
298
299 // ─── Entropy multiplier table ──────────────────────────────────────────
300 /// Per-strategy entropy multipliers for AC strategy cost model.
301 /// Controls relative preference for each transform type.
302 pub entropy_mul_table: EntropyMulTable,
303
304 // ─── Patch encoding ────────────────────────────────────────────────────
305 /// Use tree learning for patch reference frame encoding.
306 /// When true AND ref frame is large enough (>= 128×128), enables adaptive
307 /// prediction in the modular encoder for patch ref frames.
308 /// Reference: false (libjxl uses simple Gradient predictor).
309 /// Experimental: true at effort >= 7 (PR #4533 style improvement).
310 pub patch_ref_tree_learning: bool,
311
312 // ─── RCT selection ───────────────────────────────────────────────────
313 /// Number of Reversible Color Transform variants to evaluate before
314 /// committing to one (0 = skip search, use YCoCg unconditionally).
315 ///
316 /// Pipeline stage: modular pre-transform, before predictor + tree
317 /// learning (`modular/encode.rs::select_best_rct`,
318 /// `modular/frame.rs::select_best_rct_at`). Each candidate runs a
319 /// cost estimate; the cheapest wins.
320 /// Effort interaction: 0 at e<5, 4 at e5, 5 at e6, 7 at e7, 9 at e8,
321 /// 19 at e9+ (libjxl `kSquirrel`/`kKitten`/`kTortoise` schedule).
322 /// Override when a specific content class (e.g., film stills) has a
323 /// known-best RCT and the search is wasted compute, or when sweeping
324 /// to discover content-specific defaults.
325 pub nb_rcts_to_try: u8,
326
327 // ─── WP parameter search ───────────────────────────────────────────────
328 /// Number of weighted-predictor parameter sets to try when tuning the
329 /// modular WP per channel (0 = use the libjxl default parameters
330 /// without searching).
331 ///
332 /// Pipeline stage: modular predictor selection
333 /// (`modular/predictor.rs::find_best_wp_params`, called from
334 /// `modular/section.rs`, `modular/frame.rs`, `modular/encode.rs`).
335 /// Effort interaction: 0 at e<8, 2 at e8, 5 at e9+. The search is
336 /// expensive (each candidate runs a cost estimate over all WP-eligible
337 /// channels), which is why libjxl gates it behind `kKitten`/`kTortoise`.
338 /// Override to force the search on at lower effort (e.g., when a picker
339 /// wants e6-quality bytes with WP-fitted parameters), or off at e9 for
340 /// faster sweeps.
341 pub wp_num_param_sets: u8,
342
343 // ─── Tree learning parameters ────────────────────────────────────────
344 // Read by `modular/tree_learn.rs::TreeLearningParams::from_profile`.
345 // These describe the *shape* of the MA tree — wider trees split on
346 // more properties / finer buckets, deeper trees use lower thresholds,
347 // and the sampling caps trade tree-learning compute for accuracy.
348 /// Number of MA-tree decision properties to evaluate per split.
349 /// Capped to the order length defined in `modular/tree_learn.rs`
350 /// (15 without `group_id`, 16 with).
351 /// Effort interaction: 3 at e<=4, 4 at e5, 5 at e6, 7 at e7, 10 at e8,
352 /// 16 at e9+. More properties = better trees but quadratic cost in
353 /// `LearnTree`. Override to retune the speed/quality knee per content.
354 pub tree_num_properties: u8,
355 /// Maximum number of quantization buckets per property when building
356 /// the histogram for tree splits. Matches libjxl
357 /// `enc_modular.cc:556-590` `max_property_values` per speed tier.
358 /// Effort interaction: 32 at e<=4, 48 at e5, 64 at e6, 96 at e7,
359 /// 128 at e8, 256 at e9+. Higher = finer thresholds at higher learning
360 /// cost. Override when a corpus benefits from coarser/finer splits
361 /// than the libjxl tier table predicts.
362 pub tree_max_buckets: u16,
363 /// Base entropy-cost threshold a candidate split must beat to be
364 /// accepted (libjxl `75 + 14 * speed_tier` in
365 /// `enc_modular.cc::LearnTreeHeuristics`).
366 /// Effort interaction: 173 at e<=1 (speed_tier=9), 117 at e5 (5),
367 /// 75 at e9+ (1). Lower threshold = more splits = larger tree. Override
368 /// to bias the tree shallower (cheaper decode) or deeper (better fit).
369 pub tree_threshold_base: f32,
370 /// Hard cap on samples drawn for tree learning when set; `0` defers
371 /// to [`Self::tree_sample_fraction`].
372 /// Read by `modular/tree_learn.rs::sample_count_for_profile`.
373 /// Effort interaction: 65,000 at e<=4 (cheap, fixed budget), 0 at e>=5
374 /// (let the fraction-based path scale with image size). Override to
375 /// fix the tree-learning compute regardless of input pixels.
376 pub tree_max_samples_fixed: u32,
377 /// Fraction of total pixels to sample for tree learning when
378 /// [`Self::tree_max_samples_fixed`] is `0`. Floor of 65,536 samples.
379 /// Read by `modular/tree_learn.rs::sample_count_for_profile`.
380 /// Effort interaction: 0.15 at e<=4, 0.25 at e5, 0.35 at e6, 0.5 at e7,
381 /// 0.55 at e8, 0.65 at e9+ (libjxl PR #4236). Higher fractions improve
382 /// tree fit (especially on large images) at proportional cost. Override
383 /// to densify sampling on large images at moderate effort, or thin
384 /// sampling for fast sweeps at high effort.
385 pub tree_sample_fraction: f32,
386}
387
388impl EffortProfile {
389 /// Create an effort profile for lossy (VarDCT) encoding.
390 pub fn lossy(effort: u8, mode: EncoderMode) -> Self {
391 let effort = effort.clamp(1, 10);
392 match mode {
393 EncoderMode::Reference => Self::lossy_reference(effort),
394 EncoderMode::Experimental => Self::lossy_experimental(effort),
395 }
396 }
397
398 /// Create an effort profile for lossless (modular) encoding.
399 pub fn lossless(effort: u8, mode: EncoderMode) -> Self {
400 let effort = effort.clamp(1, 10);
401 match mode {
402 EncoderMode::Reference => Self::lossless_reference(effort),
403 EncoderMode::Experimental => Self::lossless_experimental(effort),
404 }
405 }
406
407 fn lossy_reference(effort: u8) -> Self {
408 let speed_tier = 10u8.saturating_sub(effort);
409
410 Self {
411 effort,
412
413 // ── Feature flags ──
414 use_ans: effort >= 3,
415 optimize_codes: effort >= 3,
416 custom_orders: effort >= 4,
417 gaborish: effort >= 5,
418 pixel_domain_loss: effort >= 5,
419 error_diffusion: false, // libjxl accepts param but never uses it
420 patches: effort >= 7,
421 tree_learning: effort >= 7,
422 // libjxl does NOT use LZ77 for VarDCT DC or AC at effort < 9.
423 // DC: ForModular() → lz77_method = kNone (modular_mode=false).
424 // AC: HistogramParams(kSquirrel, num_ctx) → lz77_method = kNone
425 // (enc_frame.cc overrides since tier > kTortoise).
426 // Only kTortoise (effort 9+) enables LZ77 for VarDCT streams.
427 lz77: effort >= 9,
428 lz77_method: match effort {
429 0..=8 => Lz77Method::Rle,
430 _ => Lz77Method::Optimal,
431 },
432 butteraugli_iters: match effort {
433 // libjxl runs FindBestQuantization unconditionally for lossy
434 // encoding. Gated at speed_tier <= kKitten (effort >= 8) in libjxl
435 // (enc_adaptive_quantization.cc:1282). kDefaultButteraugliIters=2,
436 // kMaxButteraugliIters=4 for kTortoise (effort 9+).
437 0..=7 => 0,
438 8 => 2,
439 _ => 4,
440 },
441
442 // ── AC strategy search ──
443 ac_strategy_enabled: effort >= 5,
444 try_dct16: effort >= 5,
445 try_dct32: effort >= 5,
446 try_dct64: effort >= 7,
447 try_dct4x8_afv: effort >= 6,
448 non_aligned_eval: effort >= 6,
449 fine_grained_step: if effort >= 9 { 1 } else { 2 },
450
451 // ── VarDCT pipeline ──
452 chromacity_adjustment: effort >= 7,
453 enhanced_clustering_vardct: effort >= 9,
454 optimize_uint_configs_vardct: effort >= 9,
455 epf_dynamic_sharpness: effort >= 6,
456 cfl_two_pass: effort >= 7,
457 cfl_newton: effort >= 7,
458 cfl_newton_eps: jxl_simd::NEWTON_EPS_DEFAULT,
459 cfl_newton_max_iters: jxl_simd::NEWTON_MAX_ITERS_DEFAULT,
460
461 // ── Quantization ──
462 use_adaptive_quant: effort >= 5,
463 adjust_quant_ac: effort >= 5,
464 initial_q_numerator: if effort >= 5 { 0.39 } else { 0.79 },
465 fixed_thresholds_y: [0.56, 0.62, 0.62, 0.62],
466 adjust_thresholds: [0.58, 0.64, 0.64, 0.64],
467
468 // ── Cost model constants (from libjxl) ──
469 k_favor_2x2: -0.4,
470 k_avoid_transforms_base: 0.5,
471 k_info_loss_mul_base: 1.2,
472 k_zeros_mul_base: 9.308_906,
473 k_cost_delta_base: 10.833_273,
474 k_ac_quant: 0.765,
475
476 // ── Coefficient-domain multipliers ──
477 // Note: k8x8 mul1 has 0.75 factor applied (libjxl enc_ac_strategy.cc:790)
478 k8x8: (-0.55 * 0.75, 1.073_575_8 * 0.75, 1.4),
479 k16x8: (-0.55, 0.901_958_8, 1.6),
480 k16x16: (-0.65, 0.88, 1.8),
481 k4x8: (-0.50 * 0.75, 0.88, 1.3),
482 k4x4: (-0.45 * 0.75, 0.85, 1.2),
483
484 // ── Entropy multiplier table ──
485 entropy_mul_table: EntropyMulTable::reference(),
486
487 // ── Patch encoding ──
488 patch_ref_tree_learning: false,
489
490 // ── RCT selection ──
491 nb_rcts_to_try: match effort {
492 0..=4 => 0,
493 5 => 4,
494 6 => 5,
495 7 => 7,
496 8 => 9,
497 _ => 19,
498 },
499
500 // ── WP parameter search ──
501 wp_num_param_sets: match effort {
502 0..=7 => 0,
503 8 => 2,
504 _ => 5,
505 },
506
507 // ── Tree learning ──
508 tree_num_properties: Self::tree_num_properties_for(effort),
509 tree_max_buckets: Self::tree_max_buckets_for(effort),
510 tree_threshold_base: 75.0 + 14.0 * speed_tier as f32,
511 tree_max_samples_fixed: if effort <= 4 { 65_000 } else { 0 },
512 // Effort-scaled nb_repeats matching libjxl PR #4236
513 tree_sample_fraction: Self::tree_sample_fraction_for(effort),
514 }
515 }
516
517 fn lossless_reference(effort: u8) -> Self {
518 let speed_tier = 10u8.saturating_sub(effort);
519
520 Self {
521 effort,
522
523 // ── Feature flags ──
524 use_ans: effort >= 3,
525 optimize_codes: effort >= 2,
526 custom_orders: effort >= 3,
527 gaborish: false, // N/A for lossless
528 pixel_domain_loss: false, // N/A for lossless
529 error_diffusion: false, // N/A for lossless
530 patches: effort >= 5,
531 tree_learning: effort >= 7,
532 lz77: effort >= 7,
533 lz77_method: match effort {
534 0..=7 => Lz77Method::Rle,
535 8 => Lz77Method::Greedy,
536 _ => Lz77Method::Optimal,
537 },
538 butteraugli_iters: 0, // N/A for lossless
539
540 // ── AC strategy (N/A for lossless) ──
541 ac_strategy_enabled: false,
542 try_dct16: false,
543 try_dct32: false,
544 try_dct64: false,
545 try_dct4x8_afv: false,
546 non_aligned_eval: false,
547 fine_grained_step: 2,
548
549 // ── VarDCT pipeline (N/A for lossless) ──
550 chromacity_adjustment: false,
551 enhanced_clustering_vardct: false,
552 optimize_uint_configs_vardct: false, // N/A for lossless
553 epf_dynamic_sharpness: false,
554 cfl_two_pass: false,
555 cfl_newton: false,
556 cfl_newton_eps: jxl_simd::NEWTON_EPS_DEFAULT,
557 cfl_newton_max_iters: jxl_simd::NEWTON_MAX_ITERS_DEFAULT,
558
559 // ── Quantization (N/A for lossless) ──
560 use_adaptive_quant: false,
561 adjust_quant_ac: false,
562 initial_q_numerator: 0.39,
563 fixed_thresholds_y: [0.56, 0.62, 0.62, 0.62],
564 adjust_thresholds: [0.58, 0.64, 0.64, 0.64],
565
566 // ── Cost model constants (used for tree learning cost estimates) ──
567 k_favor_2x2: -0.4,
568 k_avoid_transforms_base: 0.5,
569 k_info_loss_mul_base: 1.2,
570 k_zeros_mul_base: 9.308_906,
571 k_cost_delta_base: 10.833_273,
572 k_ac_quant: 0.765,
573
574 // ── Coefficient-domain multipliers (N/A for lossless) ──
575 k8x8: (-0.55 * 0.75, 1.073_575_8 * 0.75, 1.4),
576 k16x8: (-0.55, 0.901_958_8, 1.6),
577 k16x16: (-0.65, 0.88, 1.8),
578 k4x8: (-0.50 * 0.75, 0.88, 1.3),
579 k4x4: (-0.45 * 0.75, 0.85, 1.2),
580
581 // ── Entropy multiplier table (N/A for lossless, but struct requires it) ──
582 entropy_mul_table: EntropyMulTable::reference(),
583
584 // ── Patch encoding ──
585 patch_ref_tree_learning: false,
586
587 // ── RCT selection ──
588 nb_rcts_to_try: match effort {
589 0..=4 => 0,
590 5 => 4,
591 6 => 5,
592 7 => 7,
593 8 => 9,
594 _ => 19,
595 },
596
597 // ── WP parameter search ──
598 wp_num_param_sets: match effort {
599 0..=7 => 0,
600 8 => 2,
601 _ => 5,
602 },
603
604 // ── Tree learning ──
605 tree_num_properties: Self::tree_num_properties_for(effort),
606 tree_max_buckets: Self::tree_max_buckets_for(effort),
607 tree_threshold_base: 75.0 + 14.0 * speed_tier as f32,
608 tree_max_samples_fixed: if effort <= 4 { 65_000 } else { 0 },
609 // Effort-scaled nb_repeats matching libjxl PR #4236
610 tree_sample_fraction: Self::tree_sample_fraction_for(effort),
611 }
612 }
613
614 /// Experimental lossy profile with tuning from libjxl PRs and our own improvements.
615 ///
616 /// Divergences from reference (documented per-field):
617 /// - `k_info_loss_mul_base`: 1.2 → 1.3 (PR #4506, +8% pixel-domain loss weight)
618 /// - `entropy_mul_table`: PR #4506 values (favor DCT4x4, Identity, AFV)
619 /// - `enhanced_clustering_vardct`: enabled at effort >= 7 (was e9+)
620 /// - `patch_ref_tree_learning`: true at effort >= 7 (tree learning for patch ref frames)
621 fn lossy_experimental(effort: u8) -> Self {
622 let mut p = Self::lossy_reference(effort);
623
624 // PR #4506 (Jon Sneyers): +8% weight on pixel-domain loss improves visual quality
625 // on detailed content. The info_loss_mul scales the IDCT-domain error term in
626 // EstimateEntropy, making the cost model more sensitive to visible artifacts.
627 // Reference: 1.2 (libjxl enc_ac_strategy.cc). Experimental: 1.3.
628 p.k_info_loss_mul_base = 1.3;
629
630 // PR #4506 entropy multiplier rebalancing: favor small/detail-preserving transforms.
631 p.entropy_mul_table = EntropyMulTable::experimental();
632
633 // Pair-merge histogram clustering helps VarDCT at effort 7+ (not just e9+).
634 // The ANS header cost savings from merging similar distributions outweigh the
635 // slight data cost increase from sharing code tables across contexts.
636 if effort >= 7 {
637 p.enhanced_clustering_vardct = true;
638 }
639
640 // Tree learning for patch reference frames: adapts prediction to packed glyphs
641 // instead of using fixed Gradient predictor. Significant on large ref frames
642 // (screenshots with many unique patterns). Gated at effort >= 7.
643 if effort >= 7 {
644 p.patch_ref_tree_learning = true;
645 }
646
647 p
648 }
649
650 fn lossless_experimental(effort: u8) -> Self {
651 Self::lossless_reference(effort)
652 }
653
654 fn tree_num_properties_for(effort: u8) -> u8 {
655 match effort {
656 0..=4 => 3,
657 5 => 4,
658 6 => 5,
659 7 => 7,
660 8 => 10,
661 // 16 = all properties including group_id.
662 // Non-squeeze array has 15 elements, so .min(15) caps correctly.
663 // Squeeze array has 16 elements (group_id always included).
664 _ => 16,
665 }
666 }
667
668 /// Effort-scaled pixel sampling fraction for tree learning (libjxl PR #4236).
669 fn tree_sample_fraction_for(effort: u8) -> f32 {
670 match effort {
671 0..=4 => 0.15,
672 5 => 0.25,
673 6 => 0.35,
674 7 => 0.5,
675 8 => 0.55,
676 _ => 0.65,
677 }
678 }
679
680 fn tree_max_buckets_for(effort: u8) -> u16 {
681 // Matches libjxl enc_modular.cc:556-590 max_property_values by speed_tier.
682 match effort {
683 0..=4 => 32, // <=Cheetah
684 5 => 48, // Hare
685 6 => 64, // Wombat
686 7 => 96, // Squirrel
687 8 => 128, // Kitten
688 _ => 256, // Tortoise
689 }
690 }
691}
692
693// ─────────────────────────────────────────────────────────────────────────
694// Public expert surface — segmented Lossy / Lossless internal-param structs
695// ─────────────────────────────────────────────────────────────────────────
696//
697// `LossyInternalParams` and `LosslessInternalParams` are the public picker /
698// sweep escape hatch (gated behind `__expert`). They split the internal
699// [`EffortProfile`] into two type-disjoint surfaces — one per encode mode —
700// so callers cannot accidentally hand the lossy encoder a knob that only
701// affects modular output, and vice-versa. The type system enforces
702// mode-correctness instead of relying on documentation.
703//
704// Each `Some(_)` field overrides the corresponding `EffortProfile` field
705// the lossy / lossless code path actually reads. Fields left at `None` keep
706// the (effort, mode)-derived default. This matches the segmented
707// `InternalParams` pattern used by zenavif / zenwebp / zenravif.
708
709/// Picker / sweep override knobs for the **lossy (VarDCT)** encode path.
710///
711/// Apply via [`crate::api::LossyConfig::with_internal_params`]. Fields are
712/// optional: `Some(value)` overrides the corresponding effort-derived
713/// default; `None` keeps the default. `#[non_exhaustive]` so additional
714/// knobs can land additively without a breaking change.
715///
716/// The fields here are the lossy-side knobs that flow through `profile.X`
717/// at lossy encode time (verified against `vardct/encoder.rs`,
718/// `vardct/ac_strategy_search.rs`, `vardct/transform.rs`,
719/// `vardct/precomputed.rs`, and `vardct/bitstream.rs`). Modular-only knobs
720/// (RCT search, WP parameter scan, tree-learning shape) live on
721/// [`LosslessInternalParams`] — VarDCT's DC frame uses a fixed Gradient
722/// predictor, so those knobs do not affect lossy bytes.
723#[cfg(feature = "__expert")]
724#[non_exhaustive]
725#[derive(Default, Clone, Debug)]
726pub struct LossyInternalParams {
727 /// Try DCT16x16 / DCT16x8 / DCT8x16 transforms in AC strategy search.
728 /// Default at effort 7: `true`. Disabling forces no 16×16-class merges.
729 pub try_dct16: Option<bool>,
730
731 /// Try DCT32x32 / DCT32x16 / DCT16x32 transforms.
732 /// Default at effort 7: `true`. Disabling forces no 32×32-class merges.
733 pub try_dct32: Option<bool>,
734
735 /// Try DCT64x64 / DCT64x32 / DCT32x64 transforms.
736 /// Default at effort 7: `true`. Disabling forces no 64×64-class merges.
737 pub try_dct64: Option<bool>,
738
739 /// Try DCT4x8 / DCT8x4 / DCT4x4 / AFV transforms.
740 /// Default at effort 6+: `true`. Disabling forces 8×8-or-larger only.
741 pub try_dct4x8_afv: Option<bool>,
742
743 /// Step size for fine-grained AC strategy search on 32×32+ blocks.
744 /// `1` evaluates every position (effort 9+), `2` every other (default).
745 pub fine_grained_step: Option<u8>,
746
747 /// Base multiplier on the IDCT-domain (pixel-domain) error term in
748 /// `EstimateEntropy`. Reference: 1.2 (libjxl). Experimental: 1.3
749 /// (PR #4506). Higher values weight visible artifacts more heavily
750 /// vs coefficient-domain entropy.
751 pub k_info_loss_mul_base: Option<f32>,
752
753 /// Per-strategy entropy multipliers for AC strategy cost model.
754 /// Controls relative preference for each transform type.
755 pub entropy_mul_table: Option<EntropyMulTable>,
756
757 /// Recompute CfL map after initial quantization for better estimates.
758 /// Default at effort 7+: `true`.
759 pub cfl_two_pass: Option<bool>,
760
761 /// Apply pixel-level chromacity adjustments. Default at effort 7+:
762 /// `true`. Disabling skips per-pixel chromacity nudges.
763 pub chromacity_adjustment: Option<bool>,
764
765 /// Use tree learning for patch reference frame encoding instead of the
766 /// fixed Gradient predictor. Reference: `false`. Experimental at
767 /// effort 7+: `true`. Significant on screenshots / packed glyph patches.
768 pub patch_ref_tree_learning: Option<bool>,
769
770 /// Enable non-aligned evaluation pass (odd-aligned 16×16 regions) in
771 /// AC strategy search. Default at effort 6+: `true`. Disabling halves
772 /// the search depth.
773 pub non_aligned_eval: Option<bool>,
774
775 /// Use pair-merge clustering for VarDCT entropy codes. Reference at
776 /// effort 9+: `true`; experimental at effort 7+: `true`. When `false`,
777 /// uses fast k-means-only clustering (cheaper, slightly larger codes).
778 pub enhanced_clustering_vardct: Option<bool>,
779
780 /// Quantization-cost constant used when materializing the initial
781 /// quant field (libjxl 0.765, `enc_adaptive_quantization.cc`). Lower
782 /// values produce a coarser initial field (less rate, more distortion);
783 /// higher values refine.
784 pub k_ac_quant: Option<f32>,
785}
786
787/// Picker / sweep override knobs for the **lossless (modular)** encode path.
788///
789/// Apply via [`crate::api::LosslessConfig::with_internal_params`]. Fields
790/// are optional: `Some(value)` overrides the corresponding effort-derived
791/// default; `None` keeps the default. `#[non_exhaustive]` so additional
792/// knobs can land additively without a breaking change.
793///
794/// The fields here are the modular-path knobs that flow through `profile.X`
795/// in `modular/encode.rs`, `modular/frame.rs`, `modular/section.rs`,
796/// `modular/predictor.rs`, and `modular/tree_learn.rs`. AC-strategy and
797/// CfL knobs live on [`LossyInternalParams`].
798#[cfg(feature = "__expert")]
799#[non_exhaustive]
800#[derive(Default, Clone, Debug)]
801pub struct LosslessInternalParams {
802 /// Number of Reversible Color Transform variants to evaluate before
803 /// committing (0 = skip search, use YCoCg unconditionally).
804 /// Effort interaction: 0 at e<5, 4 at e5, 5 at e6, 7 at e7, 9 at e8,
805 /// 19 at e9+ (libjxl `kSquirrel`/`kKitten`/`kTortoise` schedule).
806 pub nb_rcts_to_try: Option<u8>,
807
808 /// Number of weighted-predictor parameter sets to try per WP-eligible
809 /// channel (0 = use libjxl defaults without searching).
810 /// Effort interaction: 0 at e<8, 2 at e8, 5 at e9+.
811 pub wp_num_param_sets: Option<u8>,
812
813 /// Maximum quantization buckets per property when building the
814 /// histogram for tree splits.
815 /// Effort interaction: 32 at e<=4, 48 at e5, 64 at e6, 96 at e7,
816 /// 128 at e8, 256 at e9+. Higher = finer thresholds at higher cost.
817 pub tree_max_buckets: Option<u16>,
818
819 /// Number of MA-tree decision properties to evaluate per split.
820 /// Effort interaction: 3 at e<=4, 4 at e5, 5 at e6, 7 at e7, 10 at e8,
821 /// 16 at e9+.
822 pub tree_num_properties: Option<u8>,
823
824 /// Base entropy-cost threshold a candidate split must beat to be
825 /// accepted (libjxl `75 + 14 * speed_tier`). Lower = more splits =
826 /// larger tree.
827 pub tree_threshold_base: Option<f32>,
828
829 /// Fraction of total pixels to sample for tree learning (when
830 /// `tree_max_samples_fixed` is `0`). Floor of 65,536 samples.
831 /// Effort interaction: 0.15 at e<=4 ramping to 0.65 at e9+
832 /// (libjxl PR #4236).
833 pub tree_sample_fraction: Option<f32>,
834
835 /// Hard cap on samples drawn for tree learning when set; `0` defers
836 /// to [`Self::tree_sample_fraction`].
837 /// Effort interaction: 65,000 at e<=4, 0 at e>=5.
838 pub tree_max_samples_fixed: Option<u32>,
839}
840
841#[cfg(feature = "__expert")]
842impl LossyInternalParams {
843 /// Apply each `Some(_)` field on top of `profile`.
844 pub(crate) fn apply_to(self, profile: &mut EffortProfile) {
845 let LossyInternalParams {
846 try_dct16,
847 try_dct32,
848 try_dct64,
849 try_dct4x8_afv,
850 fine_grained_step,
851 k_info_loss_mul_base,
852 entropy_mul_table,
853 cfl_two_pass,
854 chromacity_adjustment,
855 patch_ref_tree_learning,
856 non_aligned_eval,
857 enhanced_clustering_vardct,
858 k_ac_quant,
859 } = self;
860 if let Some(v) = try_dct16 {
861 profile.try_dct16 = v;
862 }
863 if let Some(v) = try_dct32 {
864 profile.try_dct32 = v;
865 }
866 if let Some(v) = try_dct64 {
867 profile.try_dct64 = v;
868 }
869 if let Some(v) = try_dct4x8_afv {
870 profile.try_dct4x8_afv = v;
871 }
872 if let Some(v) = fine_grained_step {
873 profile.fine_grained_step = v;
874 }
875 if let Some(v) = k_info_loss_mul_base {
876 profile.k_info_loss_mul_base = v;
877 }
878 if let Some(v) = entropy_mul_table {
879 profile.entropy_mul_table = v;
880 }
881 if let Some(v) = cfl_two_pass {
882 profile.cfl_two_pass = v;
883 }
884 if let Some(v) = chromacity_adjustment {
885 profile.chromacity_adjustment = v;
886 }
887 if let Some(v) = patch_ref_tree_learning {
888 profile.patch_ref_tree_learning = v;
889 }
890 if let Some(v) = non_aligned_eval {
891 profile.non_aligned_eval = v;
892 }
893 if let Some(v) = enhanced_clustering_vardct {
894 profile.enhanced_clustering_vardct = v;
895 }
896 if let Some(v) = k_ac_quant {
897 profile.k_ac_quant = v;
898 }
899 }
900}
901
902#[cfg(feature = "__expert")]
903impl LosslessInternalParams {
904 /// Apply each `Some(_)` field on top of `profile`.
905 pub(crate) fn apply_to(self, profile: &mut EffortProfile) {
906 let LosslessInternalParams {
907 nb_rcts_to_try,
908 wp_num_param_sets,
909 tree_max_buckets,
910 tree_num_properties,
911 tree_threshold_base,
912 tree_sample_fraction,
913 tree_max_samples_fixed,
914 } = self;
915 if let Some(v) = nb_rcts_to_try {
916 profile.nb_rcts_to_try = v;
917 }
918 if let Some(v) = wp_num_param_sets {
919 profile.wp_num_param_sets = v;
920 }
921 if let Some(v) = tree_max_buckets {
922 profile.tree_max_buckets = v;
923 }
924 if let Some(v) = tree_num_properties {
925 profile.tree_num_properties = v;
926 }
927 if let Some(v) = tree_threshold_base {
928 profile.tree_threshold_base = v;
929 }
930 if let Some(v) = tree_sample_fraction {
931 profile.tree_sample_fraction = v;
932 }
933 if let Some(v) = tree_max_samples_fixed {
934 profile.tree_max_samples_fixed = v;
935 }
936 }
937}
938
939#[cfg(test)]
940mod tests {
941 use super::*;
942
943 #[test]
944 fn test_lossy_reference_e7() {
945 let p = EffortProfile::lossy(7, EncoderMode::Reference);
946 assert_eq!(p.effort, 7);
947 assert!(p.use_ans);
948 assert!(p.optimize_codes);
949 assert!(p.custom_orders);
950 assert!(p.gaborish);
951 assert!(p.pixel_domain_loss);
952 assert!(!p.error_diffusion);
953 assert!(p.patches);
954 assert!(!p.lz77); // libjxl only enables LZ77 for VarDCT at e9+ (kTortoise)
955 assert_eq!(p.butteraugli_iters, 0); // libjxl gates at speed_tier <= kKitten (e8+)
956 assert!(p.ac_strategy_enabled);
957 assert!(p.try_dct32);
958 assert!(p.try_dct64);
959 assert!(p.try_dct4x8_afv); // e6+
960 assert!(p.non_aligned_eval);
961 assert_eq!(p.fine_grained_step, 2);
962 assert!(p.chromacity_adjustment); // e7+
963 assert!(!p.enhanced_clustering_vardct); // e9+
964 assert!(!p.optimize_uint_configs_vardct); // e9+ (libjxl kNone at e<9)
965 assert!(p.epf_dynamic_sharpness); // e6+
966 assert!(p.cfl_two_pass); // e7+
967 assert!(p.cfl_newton); // e7+ with pass 2
968 assert!(p.use_adaptive_quant);
969 assert!(p.adjust_quant_ac);
970 assert_eq!(p.initial_q_numerator, 0.39);
971 assert_eq!(p.k_favor_2x2, -0.4);
972 assert_eq!(p.k_ac_quant, 0.765);
973 assert_eq!(p.nb_rcts_to_try, 7);
974 assert_eq!(p.wp_num_param_sets, 0); // e8+
975 assert_eq!(p.tree_num_properties, 7);
976 assert_eq!(p.tree_max_buckets, 96);
977 }
978
979 #[test]
980 fn test_lossy_reference_e5() {
981 let p = EffortProfile::lossy(5, EncoderMode::Reference);
982 assert_eq!(p.effort, 5);
983 assert!(p.use_ans);
984 assert!(p.gaborish);
985 assert!(p.pixel_domain_loss);
986 assert!(!p.error_diffusion); // e7+
987 assert!(!p.patches); // e7+
988 assert!(!p.lz77); // e9+ for VarDCT
989 assert!(p.ac_strategy_enabled);
990 assert!(p.try_dct32);
991 assert!(!p.try_dct64); // e7+
992 assert!(!p.try_dct4x8_afv); // e6+
993 assert!(!p.non_aligned_eval); // e6+
994 assert!(!p.chromacity_adjustment); // e7+
995 assert!(!p.enhanced_clustering_vardct); // e9+
996 assert!(!p.optimize_uint_configs_vardct); // e9+
997 assert!(!p.epf_dynamic_sharpness); // e6+
998 assert!(!p.cfl_two_pass); // e7+
999 assert!(!p.cfl_newton); // e7+
1000 assert!(p.use_adaptive_quant);
1001 assert!(p.adjust_quant_ac);
1002 assert_eq!(p.initial_q_numerator, 0.39);
1003 assert_eq!(p.butteraugli_iters, 0); // libjxl gates at speed_tier <= kKitten (e8+)
1004 assert_eq!(p.nb_rcts_to_try, 4);
1005 assert_eq!(p.wp_num_param_sets, 0); // e8+
1006 }
1007
1008 #[test]
1009 fn test_lossy_reference_e9() {
1010 let p = EffortProfile::lossy(9, EncoderMode::Reference);
1011 assert!(p.lz77); // VarDCT LZ77 enabled at e9+ (kTortoise)
1012 assert_eq!(p.lz77_method, Lz77Method::Optimal);
1013 assert_eq!(p.butteraugli_iters, 4);
1014 assert_eq!(p.fine_grained_step, 1);
1015 assert!(p.enhanced_clustering_vardct); // e9+
1016 assert!(p.optimize_uint_configs_vardct); // e9+
1017 assert_eq!(p.nb_rcts_to_try, 19);
1018 assert_eq!(p.wp_num_param_sets, 5); // e9+
1019 assert_eq!(p.tree_num_properties, 16);
1020 assert_eq!(p.tree_max_buckets, 256);
1021 }
1022
1023 #[test]
1024 fn test_lossy_reference_e8() {
1025 let p = EffortProfile::lossy(8, EncoderMode::Reference);
1026 assert!(!p.lz77); // libjxl only enables LZ77 for VarDCT at e9+
1027 assert_eq!(p.lz77_method, Lz77Method::Rle);
1028 assert_eq!(p.butteraugli_iters, 2);
1029 assert_eq!(p.fine_grained_step, 2);
1030 assert!(!p.enhanced_clustering_vardct); // e9+
1031 assert!(!p.optimize_uint_configs_vardct); // e9+
1032 assert_eq!(p.wp_num_param_sets, 2); // e8
1033 }
1034
1035 #[test]
1036 fn test_lossy_reference_e3() {
1037 let p = EffortProfile::lossy(3, EncoderMode::Reference);
1038 assert!(p.use_ans);
1039 assert!(p.optimize_codes);
1040 assert!(!p.gaborish);
1041 assert!(!p.ac_strategy_enabled);
1042 assert!(!p.use_adaptive_quant);
1043 assert!(!p.adjust_quant_ac);
1044 assert_eq!(p.initial_q_numerator, 0.79);
1045 }
1046
1047 #[test]
1048 fn test_lossless_reference_e7() {
1049 let p = EffortProfile::lossless(7, EncoderMode::Reference);
1050 assert!(p.use_ans);
1051 assert!(p.tree_learning);
1052 assert!(p.lz77);
1053 assert_eq!(p.lz77_method, Lz77Method::Rle);
1054 assert!(p.patches);
1055 assert!(!p.gaborish); // N/A
1056 assert!(!p.pixel_domain_loss); // N/A
1057 assert!(!p.ac_strategy_enabled); // N/A
1058 }
1059
1060 #[test]
1061 fn test_lossless_reference_e4() {
1062 let p = EffortProfile::lossless(4, EncoderMode::Reference);
1063 assert!(p.use_ans);
1064 assert!(!p.tree_learning); // e7+
1065 assert!(!p.lz77); // e7+
1066 assert!(!p.patches); // e5+
1067 }
1068
1069 #[test]
1070 fn test_effort_clamp() {
1071 let p = EffortProfile::lossy(0, EncoderMode::Reference);
1072 assert_eq!(p.effort, 1);
1073 let p = EffortProfile::lossy(99, EncoderMode::Reference);
1074 assert_eq!(p.effort, 10);
1075 }
1076
1077 #[test]
1078 fn test_experimental_diverges_from_reference() {
1079 // Experimental should share effort/feature-flag structure with reference
1080 for effort in 1..=10 {
1081 let r = EffortProfile::lossy(effort, EncoderMode::Reference);
1082 let e = EffortProfile::lossy(effort, EncoderMode::Experimental);
1083 assert_eq!(r.effort, e.effort);
1084 assert_eq!(r.use_ans, e.use_ans);
1085 assert_eq!(r.k_favor_2x2, e.k_favor_2x2);
1086 assert_eq!(r.butteraugli_iters, e.butteraugli_iters);
1087 assert_eq!(r.nb_rcts_to_try, e.nb_rcts_to_try);
1088 }
1089
1090 // Verify specific divergences at effort 7
1091 let r = EffortProfile::lossy(7, EncoderMode::Reference);
1092 let e = EffortProfile::lossy(7, EncoderMode::Experimental);
1093
1094 // k_info_loss_mul_base: 1.2 → 1.3 (PR #4506)
1095 assert_eq!(r.k_info_loss_mul_base, 1.2);
1096 assert_eq!(e.k_info_loss_mul_base, 1.3);
1097
1098 // entropy_mul_table: PR #4506 rebalancing
1099 assert_eq!(r.entropy_mul_table.dct4x4, 1.08);
1100 assert_eq!(e.entropy_mul_table.dct4x4, 0.88);
1101 assert_eq!(r.entropy_mul_table.identity, 1.0428);
1102 assert_eq!(e.entropy_mul_table.identity, 0.88);
1103 assert_eq!(r.entropy_mul_table.afv, 0.817_794_9);
1104 assert_eq!(e.entropy_mul_table.afv, 0.75);
1105 // Unchanged values should match
1106 assert_eq!(r.entropy_mul_table.dct8, e.entropy_mul_table.dct8);
1107 assert_eq!(r.entropy_mul_table.dct16x8, e.entropy_mul_table.dct16x8);
1108 assert_eq!(r.entropy_mul_table.dct32x32, e.entropy_mul_table.dct32x32);
1109
1110 // enhanced_clustering_vardct: e9+ → e7+ in experimental
1111 assert!(!r.enhanced_clustering_vardct); // reference e7: off
1112 assert!(e.enhanced_clustering_vardct); // experimental e7: on
1113
1114 // patch_ref_tree_learning: false → true at e7+
1115 assert!(!r.patch_ref_tree_learning);
1116 assert!(e.patch_ref_tree_learning);
1117
1118 // At effort 5, experimental should NOT enable the e7+ features
1119 let e5 = EffortProfile::lossy(5, EncoderMode::Experimental);
1120 assert!(!e5.enhanced_clustering_vardct);
1121 assert!(!e5.patch_ref_tree_learning);
1122 // But should still have the entropy_mul and info_loss_mul changes
1123 assert_eq!(e5.k_info_loss_mul_base, 1.3);
1124 assert_eq!(e5.entropy_mul_table.dct4x4, 0.88);
1125 }
1126
1127 #[test]
1128 fn test_entropy_mul_table_reference_values() {
1129 // Verify all reference values match libjxl enc_ac_strategy.cc:584
1130 let t = EntropyMulTable::reference();
1131 assert_eq!(t.dct8, 0.8);
1132 assert_eq!(t.dct4x4, 1.08);
1133 assert_eq!(t.dct4x8, 0.859_316_37);
1134 assert_eq!(t.identity, 1.0428);
1135 assert_eq!(t.dct2x2, 0.95);
1136 assert_eq!(t.afv, 0.817_794_9);
1137 assert_eq!(t.dct16x8, 1.21);
1138 assert_eq!(t.dct16x16, 1.34);
1139 assert_eq!(t.dct16x32, 1.49);
1140 assert_eq!(t.dct32x32, 1.48);
1141 assert_eq!(t.dct64x32, 2.25);
1142 assert_eq!(t.dct64x64, 2.25);
1143 }
1144
1145 #[test]
1146 fn test_entropy_mul_table_experimental_values() {
1147 // Verify PR #4506 changes and that unchanged values are preserved
1148 let t = EntropyMulTable::experimental();
1149 let r = EntropyMulTable::reference();
1150
1151 // Changed values (PR #4506)
1152 assert_eq!(t.dct4x4, 0.88); // was 1.08
1153 assert_eq!(t.identity, 0.88); // was 1.0428
1154 assert_eq!(t.afv, 0.75); // was 0.818
1155
1156 // Unchanged values
1157 assert_eq!(t.dct8, r.dct8);
1158 assert_eq!(t.dct4x8, r.dct4x8);
1159 assert_eq!(t.dct2x2, r.dct2x2);
1160 assert_eq!(t.dct16x8, r.dct16x8);
1161 assert_eq!(t.dct16x16, r.dct16x16);
1162 assert_eq!(t.dct16x32, r.dct16x32);
1163 assert_eq!(t.dct32x32, r.dct32x32);
1164 assert_eq!(t.dct64x32, r.dct64x32);
1165 assert_eq!(t.dct64x64, r.dct64x64);
1166 }
1167
1168 #[test]
1169 fn test_lossless_experimental_matches_reference() {
1170 // Lossless experimental is currently identical to reference
1171 for effort in 1..=10 {
1172 let r = EffortProfile::lossless(effort, EncoderMode::Reference);
1173 let e = EffortProfile::lossless(effort, EncoderMode::Experimental);
1174 assert_eq!(r.effort, e.effort);
1175 assert_eq!(r.use_ans, e.use_ans);
1176 assert_eq!(r.tree_learning, e.tree_learning);
1177 assert_eq!(r.lz77, e.lz77);
1178 }
1179 }
1180
1181 #[test]
1182 fn test_tree_threshold_base_formula() {
1183 // speed_tier = 10 - effort
1184 // threshold = 75 + 14 * speed_tier
1185 let p = EffortProfile::lossy(7, EncoderMode::Reference);
1186 assert_eq!(p.tree_threshold_base, 75.0 + 14.0 * 3.0); // speed_tier=3
1187 let p = EffortProfile::lossy(9, EncoderMode::Reference);
1188 assert_eq!(p.tree_threshold_base, 75.0 + 14.0 * 1.0); // speed_tier=1
1189 let p = EffortProfile::lossy(5, EncoderMode::Reference);
1190 assert_eq!(p.tree_threshold_base, 75.0 + 14.0 * 5.0); // speed_tier=5
1191 }
1192}