Skip to main content

jxl_encoder/
effort.rs

1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! Centralized effort-derived encoder decisions.
6//!
7//! Every effort-gated decision in the encoder reads from an [`EffortProfile`]
8//! instead of checking `if effort >= N` inline. Construct once from
9//! `(effort, mode)`, then pass to all subsystems.
10
11use crate::api::EncoderMode;
12use crate::entropy_coding::lz77::Lz77Method;
13
14/// Per-strategy raw entropy multipliers for the AC strategy cost model.
15///
16/// These control the relative preference for each transform type in AC strategy
17/// selection. Higher values penalize a strategy (making it less likely to be chosen);
18/// lower values favor it. The 8x8-class values are normalized by DCT8's value before
19/// use, so DCT8 always evaluates at 1.0. Larger transforms use raw values directly.
20///
21/// Default values match libjxl `enc_ac_strategy.cc:584` (`kTransforms8x8[i].entropy_mul`).
22/// Experimental values from libjxl PR #4506 (Jon Sneyers, VarDCT cost tuning).
23///
24/// `#[non_exhaustive]` so future libjxl-side strategy additions can land
25/// without a breaking change. Construct via [`Self::reference`] or
26/// [`Self::experimental`] and mutate fields as needed.
27#[derive(Clone, Debug)]
28#[non_exhaustive]
29pub struct EntropyMulTable {
30    /// DCT8 base value. All 8x8-class transforms are normalized by this.
31    /// Reference: 0.8 (libjxl `enc_ac_strategy.cc:357`, `kTransforms8x8[0].entropy_mul`).
32    pub dct8: f32,
33
34    /// DCT4x4 (four 4x4 sub-blocks per 8x8 block).
35    /// Reference: 1.08. Experimental: 0.88 (PR #4506, ~19% reduction).
36    /// Lowering favors DCT4x4 for textured/detailed regions (screenshots, text).
37    pub dct4x4: f32,
38
39    /// DCT4x8 / DCT8x4 (half-block transforms for edges/detail).
40    /// Reference: 0.859316 (libjxl `enc_ac_strategy.cc`).
41    pub dct4x8: f32,
42
43    /// Identity (pixel copy, no transform).
44    /// Reference: 1.0428. Experimental: 0.88 (PR #4506, ~16% reduction).
45    /// Lowering favors identity blocks for flat/noisy regions.
46    pub identity: f32,
47
48    /// DCT2x2 (2x2 Hadamard-like transform).
49    /// Reference: 0.95 (libjxl `enc_ac_strategy.cc`).
50    pub dct2x2: f32,
51
52    /// AFV (Adaptive Frequency Variable, corner DCT).
53    /// Reference: 0.818. Experimental: 0.75 (PR #4506, ~8% reduction).
54    /// Lowering favors AFV for edge blocks with mixed content.
55    pub afv: f32,
56
57    /// DCT16x8 / DCT8x16 (larger transforms use raw values, not normalized by DCT8).
58    /// Reference: 1.21 (libjxl `enc_ac_strategy.cc`).
59    pub dct16x8: f32,
60
61    /// DCT16x16.
62    /// Reference: 1.34 (libjxl `enc_ac_strategy.cc`).
63    pub dct16x16: f32,
64
65    /// DCT16x32 / DCT32x16.
66    /// Reference: 1.49 (libjxl `enc_ac_strategy.cc`).
67    pub dct16x32: f32,
68
69    /// DCT32x32.
70    /// Reference: 1.48 (libjxl `enc_ac_strategy.cc`).
71    pub dct32x32: f32,
72
73    /// DCT64x32 / DCT32x64.
74    /// Reference: 2.25 (libjxl `enc_ac_strategy.cc`).
75    pub dct64x32: f32,
76
77    /// DCT64x64.
78    /// Reference: 2.25 (libjxl `enc_ac_strategy.cc`).
79    pub dct64x64: f32,
80}
81
82impl EntropyMulTable {
83    /// Default values matching libjxl `enc_ac_strategy.cc:584`.
84    pub fn reference() -> Self {
85        Self {
86            dct8: 0.8,
87            dct4x4: 1.08,
88            dct4x8: 0.859_316_37,
89            identity: 1.0428,
90            dct2x2: 0.95,
91            afv: 0.817_794_9,
92            dct16x8: 1.21,
93            dct16x16: 1.34,
94            dct16x32: 1.49,
95            dct32x32: 1.48,
96            dct64x32: 2.25,
97            dct64x64: 2.25,
98        }
99    }
100
101    /// Experimental values from libjxl PR #4506 (Jon Sneyers, VarDCT cost tuning).
102    ///
103    /// Changes vs reference:
104    /// - dct4x4: 1.08 → 0.88 (~19% reduction) — favor detail-preserving 4x4 sub-blocks
105    /// - identity: 1.0428 → 0.88 (~16% reduction) — favor pixel-copy for flat regions
106    /// - afv: 0.818 → 0.75 (~8% reduction) — favor corner DCT for edge blocks
107    pub fn experimental() -> Self {
108        Self {
109            dct4x4: 0.88,
110            identity: 0.88,
111            afv: 0.75,
112            ..Self::reference()
113        }
114    }
115}
116
117/// All effort-derived encoder decisions, centralized.
118///
119/// Replaces scattered `if effort >= N` checks throughout the codebase.
120/// Construct once from (effort, mode, encoding path), pass to all subsystems.
121///
122/// **Field categories**:
123/// - **Effort-derived**: changes value across effort levels (e.g., `nb_rcts_to_try`,
124///   `tree_max_buckets`, `butteraugli_iters`).
125/// - **Tuning constants**: same value at every effort in the reference profile,
126///   mode-dependent in experimental (e.g., `k_favor_2x2`, `k_info_loss_mul_base`,
127///   `entropy_mul_table`, `k8x8` etc.). The picker can dial these independently
128///   of effort.
129///
130/// `#[non_exhaustive]` so we can grow the field set as the picker discovers new
131/// useful knobs without breaking external `EffortProfile { ... }` constructions.
132/// Construct via [`Self::lossy`] or [`Self::lossless`] and mutate fields as needed.
133#[derive(Clone, Debug)]
134#[non_exhaustive]
135pub struct EffortProfile {
136    /// The raw effort level (1–10).
137    pub effort: u8,
138
139    // ─── Feature flags ───────────────────────────────────────────────────
140    /// Use ANS entropy coding instead of Huffman.
141    pub use_ans: bool,
142    /// Use two-pass mode with optimized entropy codes.
143    pub optimize_codes: bool,
144    /// Use custom coefficient ordering (AC scan order from statistics).
145    pub custom_orders: bool,
146    /// Enable gaborish inverse pre-filter.
147    pub gaborish: bool,
148    /// Enable pixel-domain loss in AC strategy selection.
149    pub pixel_domain_loss: bool,
150    /// Enable error diffusion in AC quantization.
151    pub error_diffusion: bool,
152    /// Enable patches/dictionary detection.
153    pub patches: bool,
154    /// Enable content-adaptive MA tree learning (modular path).
155    pub tree_learning: bool,
156    /// Enable LZ77 backward references in entropy coding.
157    pub lz77: bool,
158    /// LZ77 method when lz77 is enabled.
159    pub lz77_method: Lz77Method,
160    /// Number of butteraugli quantization loop iterations.
161    pub butteraugli_iters: u32,
162
163    // ─── AC strategy search ──────────────────────────────────────────────
164    /// Enable adaptive AC strategy selection (multi-block transforms).
165    pub ac_strategy_enabled: bool,
166    /// Try DCT16x16/DCT16x8/DCT8x16 transforms (multi-block 16x16 merges).
167    pub try_dct16: bool,
168    /// Try DCT32x32/DCT32x16/DCT16x32 transforms.
169    pub try_dct32: bool,
170    /// Try DCT64x64/DCT64x32/DCT32x64 transforms.
171    pub try_dct64: bool,
172    /// Try DCT4x8/DCT8x4/DCT4x4/AFV transforms (effort >= 6 in libjxl).
173    pub try_dct4x8_afv: bool,
174    /// Enable non-aligned evaluation pass (odd-aligned 16x16 regions).
175    pub non_aligned_eval: bool,
176    /// Step size for fine-grained AC strategy search on 32x32+ blocks.
177    /// 1 = every position (effort 9+), 2 = every other (default).
178    pub fine_grained_step: u8,
179
180    // ─── VarDCT pipeline options ──────────────────────────────────────────
181    /// Apply pixel-level chromacity adjustments (effort >= 7 in libjxl).
182    pub chromacity_adjustment: bool,
183    /// Use pair-merge clustering for VarDCT entropy codes (effort >= 9 in libjxl).
184    /// When false, uses fast k-means-only clustering.
185    pub enhanced_clustering_vardct: bool,
186    /// Optimize per-histogram HybridUint configs for VarDCT entropy codes.
187    /// libjxl uses uint_method=kNone (no optimization, default {4,2,0}) at effort < 9.
188    /// The fast optimization picks non-default configs whose signaling overhead
189    /// exceeds their coding benefit on VarDCT token distributions.
190    pub optimize_uint_configs_vardct: bool,
191    /// Compute per-block dynamic EPF sharpness (effort >= 6 in libjxl).
192    pub epf_dynamic_sharpness: bool,
193    /// Recompute CfL map after initial quantization for better estimates (effort >= 7 in libjxl).
194    pub cfl_two_pass: bool,
195    /// Use Newton's method (perceptual cost model) for CfL fitting (effort >= 7 in libjxl).
196    /// When false, uses fast least-squares fitting (quadratic cost, single-pass).
197    pub cfl_newton: bool,
198    /// Newton finite-difference epsilon for CfL fitting.
199    /// Controls second-derivative accuracy. Default 1.0 (libjxl uses 100.0, which oscillates).
200    pub cfl_newton_eps: f32,
201    /// Maximum Newton iterations for CfL fitting. Default 10 (libjxl uses 20).
202    pub cfl_newton_max_iters: usize,
203
204    // ─── Quantization ────────────────────────────────────────────────────
205    /// Use adaptive (content-dependent) quant field via InitialQuantField.
206    /// When false (effort < 5), uses flat quant field = 0.79/distance.
207    /// Matches libjxl enc_heuristics.cc:1097-1128.
208    pub use_adaptive_quant: bool,
209    /// Enable per-block AdjustQuantBlockAC (effort >= 5 in libjxl).
210    pub adjust_quant_ac: bool,
211    /// Numerator for the effort-fixed q parameter used in global_scale computation.
212    /// libjxl: 0.39 at effort >= 5, 0.79 at effort < 5.
213    /// global_scale = 65536 * (initial_q_numerator / distance) / 5.0
214    pub initial_q_numerator: f32,
215    /// Fixed quantization thresholds applied per-coefficient on the Y channel
216    /// when [`Self::adjust_quant_ac`] is `false`.
217    ///
218    /// Pipeline stage: VarDCT post-DCT quantization (`vardct/transform.rs`).
219    /// The four entries gate progressively higher coefficient bands; values
220    /// below the threshold round to zero.
221    /// From libjxl `enc_group.cc:358` (`kThresholdMul` constants for low-effort path).
222    /// Lowering the entries preserves more high-frequency Y detail at the cost
223    /// of bitrate; raising flattens texture. Override when an asset class needs
224    /// different texture-vs-bitrate balance than the libjxl defaults give.
225    pub fixed_thresholds_y: [f32; 4],
226    /// Initial quantization thresholds used when [`Self::adjust_quant_ac`] is
227    /// `true` (effort >= 5). Per-block adjustment iterates from these.
228    /// From libjxl `enc_group.cc:390`.
229    /// Pipeline stage: VarDCT post-DCT quantization, prior to the
230    /// `AdjustQuantBlockAC` per-block tweak. Useful as a starting point for
231    /// pickers exploring the threshold-vs-rate frontier per content class.
232    pub adjust_thresholds: [f32; 4],
233
234    // ─── Cost model constants ────────────────────────────────────────────
235    // All five `k_*` constants below feed `vardct/ac_strategy_search.rs`
236    // (the per-8×8 cost evaluator that picks DCT8 vs DCT4x4 vs IDENTITY vs
237    // larger merges). Default values come from libjxl's reference encoder
238    // and are *the same at every effort level* — they describe the cost
239    // model itself, not the search depth. The picker / sweep harness uses
240    // them to retune the model per content class without touching effort.
241    /// kFavor2X2AtHighQuality weight (-0.4 in libjxl,
242    /// `enc_ac_strategy.cc::kFavor2X2AtHighQuality`).
243    /// Applied as `k_favor_2x2 * ((5-distance)/5)^2` to IDENTITY/DCT2X2
244    /// entropy at distance < 5. More-negative values aggressively favor
245    /// pixel-copy / 2×2 blocks at low distances; useful for screenshots /
246    /// pixel art where the default photo-tuned bias under-uses IDENTITY.
247    pub k_favor_2x2: f32,
248    /// Base penalty added to every non-DCT8 strategy's cost
249    /// (libjxl `kAvoidEntropyOfTransforms = 0.5`,
250    /// `enc_ac_strategy.cc::EvalAcStrategy`). Higher values discourage the
251    /// AC strategy search from leaving DCT8; lower values let it spread to
252    /// IDENTITY / DCT4x4 / DCT16x16 more freely.
253    pub k_avoid_transforms_base: f32,
254    /// Base multiplier on the IDCT-domain (pixel-domain) error term in
255    /// `EstimateEntropy` (libjxl 1.2, `enc_ac_strategy.cc`).
256    /// PR #4506 raised this to 1.3 for the experimental profile — heavier
257    /// weight on visible artifacts vs coefficient-domain entropy.
258    pub k_info_loss_mul_base: f32,
259    /// Base multiplier on the zero-coefficient cost term (libjxl 9.309,
260    /// `enc_ac_strategy.cc`). Increasing rewards strategies that leave
261    /// many coefficients exactly zero (boosts large-DCT use on smooth
262    /// regions). Lowering lets non-zero residuals stay cheaper.
263    pub k_zeros_mul_base: f32,
264    /// Base delta added inside the cost-model interpolation (libjxl 10.833,
265    /// `enc_ac_strategy.cc`). Acts as an "exchange rate" between rate
266    /// (entropy proxy) and distortion (info-loss term); rarely retuned
267    /// outside picker/sweep work.
268    pub k_cost_delta_base: f32,
269    /// Quantization-cost constant used when materializing the initial
270    /// quant field (libjxl 0.765, `enc_adaptive_quantization.cc`). Read by
271    /// `vardct/precomputed.rs` and `vardct/encoder.rs`. Lower values
272    /// produce a coarser initial field (less rate, more distortion);
273    /// higher refines.
274    pub k_ac_quant: f32,
275
276    // ─── Coefficient-domain multiplier constants ─────────────────────────
277    // Each tuple is `(mul1, mul2, base)` for the EstimateEntropy /
278    // info-loss formula in `vardct/ac_strategy_search.rs`. `mul1` weights
279    // the negative log-rate term, `mul2` weights the AC magnitude term,
280    // and `base` is added unconditionally. Defaults come from libjxl's
281    // `enc_ac_strategy.cc`. Mode-/effort-independent in both reference
282    // and experimental — cost-model knobs the picker can dial.
283    /// DCT8x8 coefficient-domain multiplier `(mul1, mul2, base)`.
284    /// Note: stored values include libjxl's 0.75 factor on `mul1`/`mul2`
285    /// (applied at `enc_ac_strategy.cc:790` for 8×8-class transforms).
286    pub k8x8: (f32, f32, f32),
287    /// DCT16x8 / DCT8x16 coefficient-domain multiplier `(mul1, mul2, base)`.
288    /// Larger transforms skip the 0.75 factor and use the libjxl raw values.
289    pub k16x8: (f32, f32, f32),
290    /// DCT16x16 coefficient-domain multiplier `(mul1, mul2, base)`.
291    pub k16x16: (f32, f32, f32),
292    /// DCT4x8 / DCT8x4 coefficient-domain multiplier `(mul1, mul2, base)`.
293    /// 4×N strategies share the 0.75 factor with 8×8.
294    pub k4x8: (f32, f32, f32),
295    /// DCT4x4 coefficient-domain multiplier `(mul1, mul2, base)`.
296    /// 4×4 strategies share the 0.75 factor with 8×8.
297    pub k4x4: (f32, f32, f32),
298
299    // ─── Entropy multiplier table ──────────────────────────────────────────
300    /// Per-strategy entropy multipliers for AC strategy cost model.
301    /// Controls relative preference for each transform type.
302    pub entropy_mul_table: EntropyMulTable,
303
304    // ─── Patch encoding ────────────────────────────────────────────────────
305    /// Use tree learning for patch reference frame encoding.
306    /// When true AND ref frame is large enough (>= 128×128), enables adaptive
307    /// prediction in the modular encoder for patch ref frames.
308    /// Reference: false (libjxl uses simple Gradient predictor).
309    /// Experimental: true at effort >= 7 (PR #4533 style improvement).
310    pub patch_ref_tree_learning: bool,
311
312    // ─── RCT selection ───────────────────────────────────────────────────
313    /// Number of Reversible Color Transform variants to evaluate before
314    /// committing to one (0 = skip search, use YCoCg unconditionally).
315    ///
316    /// Pipeline stage: modular pre-transform, before predictor + tree
317    /// learning (`modular/encode.rs::select_best_rct`,
318    /// `modular/frame.rs::select_best_rct_at`). Each candidate runs a
319    /// cost estimate; the cheapest wins.
320    /// Effort interaction: 0 at e<5, 4 at e5, 5 at e6, 7 at e7, 9 at e8,
321    /// 19 at e9+ (libjxl `kSquirrel`/`kKitten`/`kTortoise` schedule).
322    /// Override when a specific content class (e.g., film stills) has a
323    /// known-best RCT and the search is wasted compute, or when sweeping
324    /// to discover content-specific defaults.
325    pub nb_rcts_to_try: u8,
326
327    // ─── WP parameter search ───────────────────────────────────────────────
328    /// Number of weighted-predictor parameter sets to try when tuning the
329    /// modular WP per channel (0 = use the libjxl default parameters
330    /// without searching).
331    ///
332    /// Pipeline stage: modular predictor selection
333    /// (`modular/predictor.rs::find_best_wp_params`, called from
334    /// `modular/section.rs`, `modular/frame.rs`, `modular/encode.rs`).
335    /// Effort interaction: 0 at e<8, 2 at e8, 5 at e9+. The search is
336    /// expensive (each candidate runs a cost estimate over all WP-eligible
337    /// channels), which is why libjxl gates it behind `kKitten`/`kTortoise`.
338    /// Override to force the search on at lower effort (e.g., when a picker
339    /// wants e6-quality bytes with WP-fitted parameters), or off at e9 for
340    /// faster sweeps.
341    pub wp_num_param_sets: u8,
342
343    // ─── Tree learning parameters ────────────────────────────────────────
344    // Read by `modular/tree_learn.rs::TreeLearningParams::from_profile`.
345    // These describe the *shape* of the MA tree — wider trees split on
346    // more properties / finer buckets, deeper trees use lower thresholds,
347    // and the sampling caps trade tree-learning compute for accuracy.
348    /// Number of MA-tree decision properties to evaluate per split.
349    /// Capped to the order length defined in `modular/tree_learn.rs`
350    /// (15 without `group_id`, 16 with).
351    /// Effort interaction: 3 at e<=4, 4 at e5, 5 at e6, 7 at e7, 10 at e8,
352    /// 16 at e9+. More properties = better trees but quadratic cost in
353    /// `LearnTree`. Override to retune the speed/quality knee per content.
354    pub tree_num_properties: u8,
355    /// Maximum number of quantization buckets per property when building
356    /// the histogram for tree splits. Matches libjxl
357    /// `enc_modular.cc:556-590` `max_property_values` per speed tier.
358    /// Effort interaction: 32 at e<=4, 48 at e5, 64 at e6, 96 at e7,
359    /// 128 at e8, 256 at e9+. Higher = finer thresholds at higher learning
360    /// cost. Override when a corpus benefits from coarser/finer splits
361    /// than the libjxl tier table predicts.
362    pub tree_max_buckets: u16,
363    /// Base entropy-cost threshold a candidate split must beat to be
364    /// accepted (libjxl `75 + 14 * speed_tier` in
365    /// `enc_modular.cc::LearnTreeHeuristics`).
366    /// Effort interaction: 173 at e<=1 (speed_tier=9), 117 at e5 (5),
367    /// 75 at e9+ (1). Lower threshold = more splits = larger tree. Override
368    /// to bias the tree shallower (cheaper decode) or deeper (better fit).
369    pub tree_threshold_base: f32,
370    /// Hard cap on samples drawn for tree learning when set; `0` defers
371    /// to [`Self::tree_sample_fraction`].
372    /// Read by `modular/tree_learn.rs::sample_count_for_profile`.
373    /// Effort interaction: 65,000 at e<=4 (cheap, fixed budget), 0 at e>=5
374    /// (let the fraction-based path scale with image size). Override to
375    /// fix the tree-learning compute regardless of input pixels.
376    pub tree_max_samples_fixed: u32,
377    /// Fraction of total pixels to sample for tree learning when
378    /// [`Self::tree_max_samples_fixed`] is `0`. Floor of 65,536 samples.
379    /// Read by `modular/tree_learn.rs::sample_count_for_profile`.
380    /// Effort interaction: 0.15 at e<=4, 0.25 at e5, 0.35 at e6, 0.5 at e7,
381    /// 0.55 at e8, 0.65 at e9+ (libjxl PR #4236). Higher fractions improve
382    /// tree fit (especially on large images) at proportional cost. Override
383    /// to densify sampling on large images at moderate effort, or thin
384    /// sampling for fast sweeps at high effort.
385    pub tree_sample_fraction: f32,
386}
387
388impl EffortProfile {
389    /// Create an effort profile for lossy (VarDCT) encoding.
390    pub fn lossy(effort: u8, mode: EncoderMode) -> Self {
391        let effort = effort.clamp(1, 10);
392        match mode {
393            EncoderMode::Reference => Self::lossy_reference(effort),
394            EncoderMode::Experimental => Self::lossy_experimental(effort),
395        }
396    }
397
398    /// Create an effort profile for lossless (modular) encoding.
399    pub fn lossless(effort: u8, mode: EncoderMode) -> Self {
400        let effort = effort.clamp(1, 10);
401        match mode {
402            EncoderMode::Reference => Self::lossless_reference(effort),
403            EncoderMode::Experimental => Self::lossless_experimental(effort),
404        }
405    }
406
407    fn lossy_reference(effort: u8) -> Self {
408        let speed_tier = 10u8.saturating_sub(effort);
409
410        Self {
411            effort,
412
413            // ── Feature flags ──
414            use_ans: effort >= 3,
415            optimize_codes: effort >= 3,
416            custom_orders: effort >= 4,
417            gaborish: effort >= 5,
418            pixel_domain_loss: effort >= 5,
419            error_diffusion: false, // libjxl accepts param but never uses it
420            patches: effort >= 7,
421            tree_learning: effort >= 7,
422            // libjxl does NOT use LZ77 for VarDCT DC or AC at effort < 9.
423            // DC: ForModular() → lz77_method = kNone (modular_mode=false).
424            // AC: HistogramParams(kSquirrel, num_ctx) → lz77_method = kNone
425            //     (enc_frame.cc overrides since tier > kTortoise).
426            // Only kTortoise (effort 9+) enables LZ77 for VarDCT streams.
427            lz77: effort >= 9,
428            lz77_method: match effort {
429                0..=8 => Lz77Method::Rle,
430                _ => Lz77Method::Optimal,
431            },
432            butteraugli_iters: match effort {
433                // libjxl runs FindBestQuantization unconditionally for lossy
434                // encoding. Gated at speed_tier <= kKitten (effort >= 8) in libjxl
435                // (enc_adaptive_quantization.cc:1282). kDefaultButteraugliIters=2,
436                // kMaxButteraugliIters=4 for kTortoise (effort 9+).
437                0..=7 => 0,
438                8 => 2,
439                _ => 4,
440            },
441
442            // ── AC strategy search ──
443            ac_strategy_enabled: effort >= 5,
444            try_dct16: effort >= 5,
445            try_dct32: effort >= 5,
446            try_dct64: effort >= 7,
447            try_dct4x8_afv: effort >= 6,
448            non_aligned_eval: effort >= 6,
449            fine_grained_step: if effort >= 9 { 1 } else { 2 },
450
451            // ── VarDCT pipeline ──
452            chromacity_adjustment: effort >= 7,
453            enhanced_clustering_vardct: effort >= 9,
454            optimize_uint_configs_vardct: effort >= 9,
455            epf_dynamic_sharpness: effort >= 6,
456            cfl_two_pass: effort >= 7,
457            cfl_newton: effort >= 7,
458            cfl_newton_eps: jxl_simd::NEWTON_EPS_DEFAULT,
459            cfl_newton_max_iters: jxl_simd::NEWTON_MAX_ITERS_DEFAULT,
460
461            // ── Quantization ──
462            use_adaptive_quant: effort >= 5,
463            adjust_quant_ac: effort >= 5,
464            initial_q_numerator: if effort >= 5 { 0.39 } else { 0.79 },
465            fixed_thresholds_y: [0.56, 0.62, 0.62, 0.62],
466            adjust_thresholds: [0.58, 0.64, 0.64, 0.64],
467
468            // ── Cost model constants (from libjxl) ──
469            k_favor_2x2: -0.4,
470            k_avoid_transforms_base: 0.5,
471            k_info_loss_mul_base: 1.2,
472            k_zeros_mul_base: 9.308_906,
473            k_cost_delta_base: 10.833_273,
474            k_ac_quant: 0.765,
475
476            // ── Coefficient-domain multipliers ──
477            // Note: k8x8 mul1 has 0.75 factor applied (libjxl enc_ac_strategy.cc:790)
478            k8x8: (-0.55 * 0.75, 1.073_575_8 * 0.75, 1.4),
479            k16x8: (-0.55, 0.901_958_8, 1.6),
480            k16x16: (-0.65, 0.88, 1.8),
481            k4x8: (-0.50 * 0.75, 0.88, 1.3),
482            k4x4: (-0.45 * 0.75, 0.85, 1.2),
483
484            // ── Entropy multiplier table ──
485            entropy_mul_table: EntropyMulTable::reference(),
486
487            // ── Patch encoding ──
488            patch_ref_tree_learning: false,
489
490            // ── RCT selection ──
491            nb_rcts_to_try: match effort {
492                0..=4 => 0,
493                5 => 4,
494                6 => 5,
495                7 => 7,
496                8 => 9,
497                _ => 19,
498            },
499
500            // ── WP parameter search ──
501            wp_num_param_sets: match effort {
502                0..=7 => 0,
503                8 => 2,
504                _ => 5,
505            },
506
507            // ── Tree learning ──
508            tree_num_properties: Self::tree_num_properties_for(effort),
509            tree_max_buckets: Self::tree_max_buckets_for(effort),
510            tree_threshold_base: 75.0 + 14.0 * speed_tier as f32,
511            tree_max_samples_fixed: if effort <= 4 { 65_000 } else { 0 },
512            // Effort-scaled nb_repeats matching libjxl PR #4236
513            tree_sample_fraction: Self::tree_sample_fraction_for(effort),
514        }
515    }
516
517    fn lossless_reference(effort: u8) -> Self {
518        let speed_tier = 10u8.saturating_sub(effort);
519
520        Self {
521            effort,
522
523            // ── Feature flags ──
524            use_ans: effort >= 3,
525            optimize_codes: effort >= 2,
526            custom_orders: effort >= 3,
527            gaborish: false,          // N/A for lossless
528            pixel_domain_loss: false, // N/A for lossless
529            error_diffusion: false,   // N/A for lossless
530            patches: effort >= 5,
531            tree_learning: effort >= 7,
532            lz77: effort >= 7,
533            lz77_method: match effort {
534                0..=7 => Lz77Method::Rle,
535                8 => Lz77Method::Greedy,
536                _ => Lz77Method::Optimal,
537            },
538            butteraugli_iters: 0, // N/A for lossless
539
540            // ── AC strategy (N/A for lossless) ──
541            ac_strategy_enabled: false,
542            try_dct16: false,
543            try_dct32: false,
544            try_dct64: false,
545            try_dct4x8_afv: false,
546            non_aligned_eval: false,
547            fine_grained_step: 2,
548
549            // ── VarDCT pipeline (N/A for lossless) ──
550            chromacity_adjustment: false,
551            enhanced_clustering_vardct: false,
552            optimize_uint_configs_vardct: false, // N/A for lossless
553            epf_dynamic_sharpness: false,
554            cfl_two_pass: false,
555            cfl_newton: false,
556            cfl_newton_eps: jxl_simd::NEWTON_EPS_DEFAULT,
557            cfl_newton_max_iters: jxl_simd::NEWTON_MAX_ITERS_DEFAULT,
558
559            // ── Quantization (N/A for lossless) ──
560            use_adaptive_quant: false,
561            adjust_quant_ac: false,
562            initial_q_numerator: 0.39,
563            fixed_thresholds_y: [0.56, 0.62, 0.62, 0.62],
564            adjust_thresholds: [0.58, 0.64, 0.64, 0.64],
565
566            // ── Cost model constants (used for tree learning cost estimates) ──
567            k_favor_2x2: -0.4,
568            k_avoid_transforms_base: 0.5,
569            k_info_loss_mul_base: 1.2,
570            k_zeros_mul_base: 9.308_906,
571            k_cost_delta_base: 10.833_273,
572            k_ac_quant: 0.765,
573
574            // ── Coefficient-domain multipliers (N/A for lossless) ──
575            k8x8: (-0.55 * 0.75, 1.073_575_8 * 0.75, 1.4),
576            k16x8: (-0.55, 0.901_958_8, 1.6),
577            k16x16: (-0.65, 0.88, 1.8),
578            k4x8: (-0.50 * 0.75, 0.88, 1.3),
579            k4x4: (-0.45 * 0.75, 0.85, 1.2),
580
581            // ── Entropy multiplier table (N/A for lossless, but struct requires it) ──
582            entropy_mul_table: EntropyMulTable::reference(),
583
584            // ── Patch encoding ──
585            patch_ref_tree_learning: false,
586
587            // ── RCT selection ──
588            nb_rcts_to_try: match effort {
589                0..=4 => 0,
590                5 => 4,
591                6 => 5,
592                7 => 7,
593                8 => 9,
594                _ => 19,
595            },
596
597            // ── WP parameter search ──
598            wp_num_param_sets: match effort {
599                0..=7 => 0,
600                8 => 2,
601                _ => 5,
602            },
603
604            // ── Tree learning ──
605            tree_num_properties: Self::tree_num_properties_for(effort),
606            tree_max_buckets: Self::tree_max_buckets_for(effort),
607            tree_threshold_base: 75.0 + 14.0 * speed_tier as f32,
608            tree_max_samples_fixed: if effort <= 4 { 65_000 } else { 0 },
609            // Effort-scaled nb_repeats matching libjxl PR #4236
610            tree_sample_fraction: Self::tree_sample_fraction_for(effort),
611        }
612    }
613
614    /// Experimental lossy profile with tuning from libjxl PRs and our own improvements.
615    ///
616    /// Divergences from reference (documented per-field):
617    /// - `k_info_loss_mul_base`: 1.2 → 1.3 (PR #4506, +8% pixel-domain loss weight)
618    /// - `entropy_mul_table`: PR #4506 values (favor DCT4x4, Identity, AFV)
619    /// - `enhanced_clustering_vardct`: enabled at effort >= 7 (was e9+)
620    /// - `patch_ref_tree_learning`: true at effort >= 7 (tree learning for patch ref frames)
621    fn lossy_experimental(effort: u8) -> Self {
622        let mut p = Self::lossy_reference(effort);
623
624        // PR #4506 (Jon Sneyers): +8% weight on pixel-domain loss improves visual quality
625        // on detailed content. The info_loss_mul scales the IDCT-domain error term in
626        // EstimateEntropy, making the cost model more sensitive to visible artifacts.
627        // Reference: 1.2 (libjxl enc_ac_strategy.cc). Experimental: 1.3.
628        p.k_info_loss_mul_base = 1.3;
629
630        // PR #4506 entropy multiplier rebalancing: favor small/detail-preserving transforms.
631        p.entropy_mul_table = EntropyMulTable::experimental();
632
633        // Pair-merge histogram clustering helps VarDCT at effort 7+ (not just e9+).
634        // The ANS header cost savings from merging similar distributions outweigh the
635        // slight data cost increase from sharing code tables across contexts.
636        if effort >= 7 {
637            p.enhanced_clustering_vardct = true;
638        }
639
640        // Tree learning for patch reference frames: adapts prediction to packed glyphs
641        // instead of using fixed Gradient predictor. Significant on large ref frames
642        // (screenshots with many unique patterns). Gated at effort >= 7.
643        if effort >= 7 {
644            p.patch_ref_tree_learning = true;
645        }
646
647        p
648    }
649
650    fn lossless_experimental(effort: u8) -> Self {
651        Self::lossless_reference(effort)
652    }
653
654    fn tree_num_properties_for(effort: u8) -> u8 {
655        match effort {
656            0..=4 => 3,
657            5 => 4,
658            6 => 5,
659            7 => 7,
660            8 => 10,
661            // 16 = all properties including group_id.
662            // Non-squeeze array has 15 elements, so .min(15) caps correctly.
663            // Squeeze array has 16 elements (group_id always included).
664            _ => 16,
665        }
666    }
667
668    /// Effort-scaled pixel sampling fraction for tree learning (libjxl PR #4236).
669    fn tree_sample_fraction_for(effort: u8) -> f32 {
670        match effort {
671            0..=4 => 0.15,
672            5 => 0.25,
673            6 => 0.35,
674            7 => 0.5,
675            8 => 0.55,
676            _ => 0.65,
677        }
678    }
679
680    fn tree_max_buckets_for(effort: u8) -> u16 {
681        // Matches libjxl enc_modular.cc:556-590 max_property_values by speed_tier.
682        match effort {
683            0..=4 => 32, // <=Cheetah
684            5 => 48,     // Hare
685            6 => 64,     // Wombat
686            7 => 96,     // Squirrel
687            8 => 128,    // Kitten
688            _ => 256,    // Tortoise
689        }
690    }
691}
692
693// ─────────────────────────────────────────────────────────────────────────
694// Public expert surface — segmented Lossy / Lossless internal-param structs
695// ─────────────────────────────────────────────────────────────────────────
696//
697// `LossyInternalParams` and `LosslessInternalParams` are the public picker /
698// sweep escape hatch (gated behind `__expert`). They split the internal
699// [`EffortProfile`] into two type-disjoint surfaces — one per encode mode —
700// so callers cannot accidentally hand the lossy encoder a knob that only
701// affects modular output, and vice-versa. The type system enforces
702// mode-correctness instead of relying on documentation.
703//
704// Each `Some(_)` field overrides the corresponding `EffortProfile` field
705// the lossy / lossless code path actually reads. Fields left at `None` keep
706// the (effort, mode)-derived default. This matches the segmented
707// `InternalParams` pattern used by zenavif / zenwebp / zenravif.
708
709/// Picker / sweep override knobs for the **lossy (VarDCT)** encode path.
710///
711/// Apply via [`crate::api::LossyConfig::with_internal_params`]. Fields are
712/// optional: `Some(value)` overrides the corresponding effort-derived
713/// default; `None` keeps the default. `#[non_exhaustive]` so additional
714/// knobs can land additively without a breaking change.
715///
716/// The fields here are the lossy-side knobs that flow through `profile.X`
717/// at lossy encode time (verified against `vardct/encoder.rs`,
718/// `vardct/ac_strategy_search.rs`, `vardct/transform.rs`,
719/// `vardct/precomputed.rs`, and `vardct/bitstream.rs`). Modular-only knobs
720/// (RCT search, WP parameter scan, tree-learning shape) live on
721/// [`LosslessInternalParams`] — VarDCT's DC frame uses a fixed Gradient
722/// predictor, so those knobs do not affect lossy bytes.
723#[cfg(feature = "__expert")]
724#[non_exhaustive]
725#[derive(Default, Clone, Debug)]
726pub struct LossyInternalParams {
727    /// Try DCT16x16 / DCT16x8 / DCT8x16 transforms in AC strategy search.
728    /// Default at effort 7: `true`. Disabling forces no 16×16-class merges.
729    pub try_dct16: Option<bool>,
730
731    /// Try DCT32x32 / DCT32x16 / DCT16x32 transforms.
732    /// Default at effort 7: `true`. Disabling forces no 32×32-class merges.
733    pub try_dct32: Option<bool>,
734
735    /// Try DCT64x64 / DCT64x32 / DCT32x64 transforms.
736    /// Default at effort 7: `true`. Disabling forces no 64×64-class merges.
737    pub try_dct64: Option<bool>,
738
739    /// Try DCT4x8 / DCT8x4 / DCT4x4 / AFV transforms.
740    /// Default at effort 6+: `true`. Disabling forces 8×8-or-larger only.
741    pub try_dct4x8_afv: Option<bool>,
742
743    /// Step size for fine-grained AC strategy search on 32×32+ blocks.
744    /// `1` evaluates every position (effort 9+), `2` every other (default).
745    pub fine_grained_step: Option<u8>,
746
747    /// Base multiplier on the IDCT-domain (pixel-domain) error term in
748    /// `EstimateEntropy`. Reference: 1.2 (libjxl). Experimental: 1.3
749    /// (PR #4506). Higher values weight visible artifacts more heavily
750    /// vs coefficient-domain entropy.
751    pub k_info_loss_mul_base: Option<f32>,
752
753    /// Per-strategy entropy multipliers for AC strategy cost model.
754    /// Controls relative preference for each transform type.
755    pub entropy_mul_table: Option<EntropyMulTable>,
756
757    /// Recompute CfL map after initial quantization for better estimates.
758    /// Default at effort 7+: `true`.
759    pub cfl_two_pass: Option<bool>,
760
761    /// Apply pixel-level chromacity adjustments. Default at effort 7+:
762    /// `true`. Disabling skips per-pixel chromacity nudges.
763    pub chromacity_adjustment: Option<bool>,
764
765    /// Use tree learning for patch reference frame encoding instead of the
766    /// fixed Gradient predictor. Reference: `false`. Experimental at
767    /// effort 7+: `true`. Significant on screenshots / packed glyph patches.
768    pub patch_ref_tree_learning: Option<bool>,
769
770    /// Enable non-aligned evaluation pass (odd-aligned 16×16 regions) in
771    /// AC strategy search. Default at effort 6+: `true`. Disabling halves
772    /// the search depth.
773    pub non_aligned_eval: Option<bool>,
774
775    /// Use pair-merge clustering for VarDCT entropy codes. Reference at
776    /// effort 9+: `true`; experimental at effort 7+: `true`. When `false`,
777    /// uses fast k-means-only clustering (cheaper, slightly larger codes).
778    pub enhanced_clustering_vardct: Option<bool>,
779
780    /// Quantization-cost constant used when materializing the initial
781    /// quant field (libjxl 0.765, `enc_adaptive_quantization.cc`). Lower
782    /// values produce a coarser initial field (less rate, more distortion);
783    /// higher values refine.
784    pub k_ac_quant: Option<f32>,
785}
786
787/// Picker / sweep override knobs for the **lossless (modular)** encode path.
788///
789/// Apply via [`crate::api::LosslessConfig::with_internal_params`]. Fields
790/// are optional: `Some(value)` overrides the corresponding effort-derived
791/// default; `None` keeps the default. `#[non_exhaustive]` so additional
792/// knobs can land additively without a breaking change.
793///
794/// The fields here are the modular-path knobs that flow through `profile.X`
795/// in `modular/encode.rs`, `modular/frame.rs`, `modular/section.rs`,
796/// `modular/predictor.rs`, and `modular/tree_learn.rs`. AC-strategy and
797/// CfL knobs live on [`LossyInternalParams`].
798#[cfg(feature = "__expert")]
799#[non_exhaustive]
800#[derive(Default, Clone, Debug)]
801pub struct LosslessInternalParams {
802    /// Number of Reversible Color Transform variants to evaluate before
803    /// committing (0 = skip search, use YCoCg unconditionally).
804    /// Effort interaction: 0 at e<5, 4 at e5, 5 at e6, 7 at e7, 9 at e8,
805    /// 19 at e9+ (libjxl `kSquirrel`/`kKitten`/`kTortoise` schedule).
806    pub nb_rcts_to_try: Option<u8>,
807
808    /// Number of weighted-predictor parameter sets to try per WP-eligible
809    /// channel (0 = use libjxl defaults without searching).
810    /// Effort interaction: 0 at e<8, 2 at e8, 5 at e9+.
811    pub wp_num_param_sets: Option<u8>,
812
813    /// Maximum quantization buckets per property when building the
814    /// histogram for tree splits.
815    /// Effort interaction: 32 at e<=4, 48 at e5, 64 at e6, 96 at e7,
816    /// 128 at e8, 256 at e9+. Higher = finer thresholds at higher cost.
817    pub tree_max_buckets: Option<u16>,
818
819    /// Number of MA-tree decision properties to evaluate per split.
820    /// Effort interaction: 3 at e<=4, 4 at e5, 5 at e6, 7 at e7, 10 at e8,
821    /// 16 at e9+.
822    pub tree_num_properties: Option<u8>,
823
824    /// Base entropy-cost threshold a candidate split must beat to be
825    /// accepted (libjxl `75 + 14 * speed_tier`). Lower = more splits =
826    /// larger tree.
827    pub tree_threshold_base: Option<f32>,
828
829    /// Fraction of total pixels to sample for tree learning (when
830    /// `tree_max_samples_fixed` is `0`). Floor of 65,536 samples.
831    /// Effort interaction: 0.15 at e<=4 ramping to 0.65 at e9+
832    /// (libjxl PR #4236).
833    pub tree_sample_fraction: Option<f32>,
834
835    /// Hard cap on samples drawn for tree learning when set; `0` defers
836    /// to [`Self::tree_sample_fraction`].
837    /// Effort interaction: 65,000 at e<=4, 0 at e>=5.
838    pub tree_max_samples_fixed: Option<u32>,
839}
840
841#[cfg(feature = "__expert")]
842impl LossyInternalParams {
843    /// Apply each `Some(_)` field on top of `profile`.
844    pub(crate) fn apply_to(self, profile: &mut EffortProfile) {
845        let LossyInternalParams {
846            try_dct16,
847            try_dct32,
848            try_dct64,
849            try_dct4x8_afv,
850            fine_grained_step,
851            k_info_loss_mul_base,
852            entropy_mul_table,
853            cfl_two_pass,
854            chromacity_adjustment,
855            patch_ref_tree_learning,
856            non_aligned_eval,
857            enhanced_clustering_vardct,
858            k_ac_quant,
859        } = self;
860        if let Some(v) = try_dct16 {
861            profile.try_dct16 = v;
862        }
863        if let Some(v) = try_dct32 {
864            profile.try_dct32 = v;
865        }
866        if let Some(v) = try_dct64 {
867            profile.try_dct64 = v;
868        }
869        if let Some(v) = try_dct4x8_afv {
870            profile.try_dct4x8_afv = v;
871        }
872        if let Some(v) = fine_grained_step {
873            profile.fine_grained_step = v;
874        }
875        if let Some(v) = k_info_loss_mul_base {
876            profile.k_info_loss_mul_base = v;
877        }
878        if let Some(v) = entropy_mul_table {
879            profile.entropy_mul_table = v;
880        }
881        if let Some(v) = cfl_two_pass {
882            profile.cfl_two_pass = v;
883        }
884        if let Some(v) = chromacity_adjustment {
885            profile.chromacity_adjustment = v;
886        }
887        if let Some(v) = patch_ref_tree_learning {
888            profile.patch_ref_tree_learning = v;
889        }
890        if let Some(v) = non_aligned_eval {
891            profile.non_aligned_eval = v;
892        }
893        if let Some(v) = enhanced_clustering_vardct {
894            profile.enhanced_clustering_vardct = v;
895        }
896        if let Some(v) = k_ac_quant {
897            profile.k_ac_quant = v;
898        }
899    }
900}
901
902#[cfg(feature = "__expert")]
903impl LosslessInternalParams {
904    /// Apply each `Some(_)` field on top of `profile`.
905    pub(crate) fn apply_to(self, profile: &mut EffortProfile) {
906        let LosslessInternalParams {
907            nb_rcts_to_try,
908            wp_num_param_sets,
909            tree_max_buckets,
910            tree_num_properties,
911            tree_threshold_base,
912            tree_sample_fraction,
913            tree_max_samples_fixed,
914        } = self;
915        if let Some(v) = nb_rcts_to_try {
916            profile.nb_rcts_to_try = v;
917        }
918        if let Some(v) = wp_num_param_sets {
919            profile.wp_num_param_sets = v;
920        }
921        if let Some(v) = tree_max_buckets {
922            profile.tree_max_buckets = v;
923        }
924        if let Some(v) = tree_num_properties {
925            profile.tree_num_properties = v;
926        }
927        if let Some(v) = tree_threshold_base {
928            profile.tree_threshold_base = v;
929        }
930        if let Some(v) = tree_sample_fraction {
931            profile.tree_sample_fraction = v;
932        }
933        if let Some(v) = tree_max_samples_fixed {
934            profile.tree_max_samples_fixed = v;
935        }
936    }
937}
938
939#[cfg(test)]
940mod tests {
941    use super::*;
942
943    #[test]
944    fn test_lossy_reference_e7() {
945        let p = EffortProfile::lossy(7, EncoderMode::Reference);
946        assert_eq!(p.effort, 7);
947        assert!(p.use_ans);
948        assert!(p.optimize_codes);
949        assert!(p.custom_orders);
950        assert!(p.gaborish);
951        assert!(p.pixel_domain_loss);
952        assert!(!p.error_diffusion);
953        assert!(p.patches);
954        assert!(!p.lz77); // libjxl only enables LZ77 for VarDCT at e9+ (kTortoise)
955        assert_eq!(p.butteraugli_iters, 0); // libjxl gates at speed_tier <= kKitten (e8+)
956        assert!(p.ac_strategy_enabled);
957        assert!(p.try_dct32);
958        assert!(p.try_dct64);
959        assert!(p.try_dct4x8_afv); // e6+
960        assert!(p.non_aligned_eval);
961        assert_eq!(p.fine_grained_step, 2);
962        assert!(p.chromacity_adjustment); // e7+
963        assert!(!p.enhanced_clustering_vardct); // e9+
964        assert!(!p.optimize_uint_configs_vardct); // e9+ (libjxl kNone at e<9)
965        assert!(p.epf_dynamic_sharpness); // e6+
966        assert!(p.cfl_two_pass); // e7+
967        assert!(p.cfl_newton); // e7+ with pass 2
968        assert!(p.use_adaptive_quant);
969        assert!(p.adjust_quant_ac);
970        assert_eq!(p.initial_q_numerator, 0.39);
971        assert_eq!(p.k_favor_2x2, -0.4);
972        assert_eq!(p.k_ac_quant, 0.765);
973        assert_eq!(p.nb_rcts_to_try, 7);
974        assert_eq!(p.wp_num_param_sets, 0); // e8+
975        assert_eq!(p.tree_num_properties, 7);
976        assert_eq!(p.tree_max_buckets, 96);
977    }
978
979    #[test]
980    fn test_lossy_reference_e5() {
981        let p = EffortProfile::lossy(5, EncoderMode::Reference);
982        assert_eq!(p.effort, 5);
983        assert!(p.use_ans);
984        assert!(p.gaborish);
985        assert!(p.pixel_domain_loss);
986        assert!(!p.error_diffusion); // e7+
987        assert!(!p.patches); // e7+
988        assert!(!p.lz77); // e9+ for VarDCT
989        assert!(p.ac_strategy_enabled);
990        assert!(p.try_dct32);
991        assert!(!p.try_dct64); // e7+
992        assert!(!p.try_dct4x8_afv); // e6+
993        assert!(!p.non_aligned_eval); // e6+
994        assert!(!p.chromacity_adjustment); // e7+
995        assert!(!p.enhanced_clustering_vardct); // e9+
996        assert!(!p.optimize_uint_configs_vardct); // e9+
997        assert!(!p.epf_dynamic_sharpness); // e6+
998        assert!(!p.cfl_two_pass); // e7+
999        assert!(!p.cfl_newton); // e7+
1000        assert!(p.use_adaptive_quant);
1001        assert!(p.adjust_quant_ac);
1002        assert_eq!(p.initial_q_numerator, 0.39);
1003        assert_eq!(p.butteraugli_iters, 0); // libjxl gates at speed_tier <= kKitten (e8+)
1004        assert_eq!(p.nb_rcts_to_try, 4);
1005        assert_eq!(p.wp_num_param_sets, 0); // e8+
1006    }
1007
1008    #[test]
1009    fn test_lossy_reference_e9() {
1010        let p = EffortProfile::lossy(9, EncoderMode::Reference);
1011        assert!(p.lz77); // VarDCT LZ77 enabled at e9+ (kTortoise)
1012        assert_eq!(p.lz77_method, Lz77Method::Optimal);
1013        assert_eq!(p.butteraugli_iters, 4);
1014        assert_eq!(p.fine_grained_step, 1);
1015        assert!(p.enhanced_clustering_vardct); // e9+
1016        assert!(p.optimize_uint_configs_vardct); // e9+
1017        assert_eq!(p.nb_rcts_to_try, 19);
1018        assert_eq!(p.wp_num_param_sets, 5); // e9+
1019        assert_eq!(p.tree_num_properties, 16);
1020        assert_eq!(p.tree_max_buckets, 256);
1021    }
1022
1023    #[test]
1024    fn test_lossy_reference_e8() {
1025        let p = EffortProfile::lossy(8, EncoderMode::Reference);
1026        assert!(!p.lz77); // libjxl only enables LZ77 for VarDCT at e9+
1027        assert_eq!(p.lz77_method, Lz77Method::Rle);
1028        assert_eq!(p.butteraugli_iters, 2);
1029        assert_eq!(p.fine_grained_step, 2);
1030        assert!(!p.enhanced_clustering_vardct); // e9+
1031        assert!(!p.optimize_uint_configs_vardct); // e9+
1032        assert_eq!(p.wp_num_param_sets, 2); // e8
1033    }
1034
1035    #[test]
1036    fn test_lossy_reference_e3() {
1037        let p = EffortProfile::lossy(3, EncoderMode::Reference);
1038        assert!(p.use_ans);
1039        assert!(p.optimize_codes);
1040        assert!(!p.gaborish);
1041        assert!(!p.ac_strategy_enabled);
1042        assert!(!p.use_adaptive_quant);
1043        assert!(!p.adjust_quant_ac);
1044        assert_eq!(p.initial_q_numerator, 0.79);
1045    }
1046
1047    #[test]
1048    fn test_lossless_reference_e7() {
1049        let p = EffortProfile::lossless(7, EncoderMode::Reference);
1050        assert!(p.use_ans);
1051        assert!(p.tree_learning);
1052        assert!(p.lz77);
1053        assert_eq!(p.lz77_method, Lz77Method::Rle);
1054        assert!(p.patches);
1055        assert!(!p.gaborish); // N/A
1056        assert!(!p.pixel_domain_loss); // N/A
1057        assert!(!p.ac_strategy_enabled); // N/A
1058    }
1059
1060    #[test]
1061    fn test_lossless_reference_e4() {
1062        let p = EffortProfile::lossless(4, EncoderMode::Reference);
1063        assert!(p.use_ans);
1064        assert!(!p.tree_learning); // e7+
1065        assert!(!p.lz77); // e7+
1066        assert!(!p.patches); // e5+
1067    }
1068
1069    #[test]
1070    fn test_effort_clamp() {
1071        let p = EffortProfile::lossy(0, EncoderMode::Reference);
1072        assert_eq!(p.effort, 1);
1073        let p = EffortProfile::lossy(99, EncoderMode::Reference);
1074        assert_eq!(p.effort, 10);
1075    }
1076
1077    #[test]
1078    fn test_experimental_diverges_from_reference() {
1079        // Experimental should share effort/feature-flag structure with reference
1080        for effort in 1..=10 {
1081            let r = EffortProfile::lossy(effort, EncoderMode::Reference);
1082            let e = EffortProfile::lossy(effort, EncoderMode::Experimental);
1083            assert_eq!(r.effort, e.effort);
1084            assert_eq!(r.use_ans, e.use_ans);
1085            assert_eq!(r.k_favor_2x2, e.k_favor_2x2);
1086            assert_eq!(r.butteraugli_iters, e.butteraugli_iters);
1087            assert_eq!(r.nb_rcts_to_try, e.nb_rcts_to_try);
1088        }
1089
1090        // Verify specific divergences at effort 7
1091        let r = EffortProfile::lossy(7, EncoderMode::Reference);
1092        let e = EffortProfile::lossy(7, EncoderMode::Experimental);
1093
1094        // k_info_loss_mul_base: 1.2 → 1.3 (PR #4506)
1095        assert_eq!(r.k_info_loss_mul_base, 1.2);
1096        assert_eq!(e.k_info_loss_mul_base, 1.3);
1097
1098        // entropy_mul_table: PR #4506 rebalancing
1099        assert_eq!(r.entropy_mul_table.dct4x4, 1.08);
1100        assert_eq!(e.entropy_mul_table.dct4x4, 0.88);
1101        assert_eq!(r.entropy_mul_table.identity, 1.0428);
1102        assert_eq!(e.entropy_mul_table.identity, 0.88);
1103        assert_eq!(r.entropy_mul_table.afv, 0.817_794_9);
1104        assert_eq!(e.entropy_mul_table.afv, 0.75);
1105        // Unchanged values should match
1106        assert_eq!(r.entropy_mul_table.dct8, e.entropy_mul_table.dct8);
1107        assert_eq!(r.entropy_mul_table.dct16x8, e.entropy_mul_table.dct16x8);
1108        assert_eq!(r.entropy_mul_table.dct32x32, e.entropy_mul_table.dct32x32);
1109
1110        // enhanced_clustering_vardct: e9+ → e7+ in experimental
1111        assert!(!r.enhanced_clustering_vardct); // reference e7: off
1112        assert!(e.enhanced_clustering_vardct); // experimental e7: on
1113
1114        // patch_ref_tree_learning: false → true at e7+
1115        assert!(!r.patch_ref_tree_learning);
1116        assert!(e.patch_ref_tree_learning);
1117
1118        // At effort 5, experimental should NOT enable the e7+ features
1119        let e5 = EffortProfile::lossy(5, EncoderMode::Experimental);
1120        assert!(!e5.enhanced_clustering_vardct);
1121        assert!(!e5.patch_ref_tree_learning);
1122        // But should still have the entropy_mul and info_loss_mul changes
1123        assert_eq!(e5.k_info_loss_mul_base, 1.3);
1124        assert_eq!(e5.entropy_mul_table.dct4x4, 0.88);
1125    }
1126
1127    #[test]
1128    fn test_entropy_mul_table_reference_values() {
1129        // Verify all reference values match libjxl enc_ac_strategy.cc:584
1130        let t = EntropyMulTable::reference();
1131        assert_eq!(t.dct8, 0.8);
1132        assert_eq!(t.dct4x4, 1.08);
1133        assert_eq!(t.dct4x8, 0.859_316_37);
1134        assert_eq!(t.identity, 1.0428);
1135        assert_eq!(t.dct2x2, 0.95);
1136        assert_eq!(t.afv, 0.817_794_9);
1137        assert_eq!(t.dct16x8, 1.21);
1138        assert_eq!(t.dct16x16, 1.34);
1139        assert_eq!(t.dct16x32, 1.49);
1140        assert_eq!(t.dct32x32, 1.48);
1141        assert_eq!(t.dct64x32, 2.25);
1142        assert_eq!(t.dct64x64, 2.25);
1143    }
1144
1145    #[test]
1146    fn test_entropy_mul_table_experimental_values() {
1147        // Verify PR #4506 changes and that unchanged values are preserved
1148        let t = EntropyMulTable::experimental();
1149        let r = EntropyMulTable::reference();
1150
1151        // Changed values (PR #4506)
1152        assert_eq!(t.dct4x4, 0.88); // was 1.08
1153        assert_eq!(t.identity, 0.88); // was 1.0428
1154        assert_eq!(t.afv, 0.75); // was 0.818
1155
1156        // Unchanged values
1157        assert_eq!(t.dct8, r.dct8);
1158        assert_eq!(t.dct4x8, r.dct4x8);
1159        assert_eq!(t.dct2x2, r.dct2x2);
1160        assert_eq!(t.dct16x8, r.dct16x8);
1161        assert_eq!(t.dct16x16, r.dct16x16);
1162        assert_eq!(t.dct16x32, r.dct16x32);
1163        assert_eq!(t.dct32x32, r.dct32x32);
1164        assert_eq!(t.dct64x32, r.dct64x32);
1165        assert_eq!(t.dct64x64, r.dct64x64);
1166    }
1167
1168    #[test]
1169    fn test_lossless_experimental_matches_reference() {
1170        // Lossless experimental is currently identical to reference
1171        for effort in 1..=10 {
1172            let r = EffortProfile::lossless(effort, EncoderMode::Reference);
1173            let e = EffortProfile::lossless(effort, EncoderMode::Experimental);
1174            assert_eq!(r.effort, e.effort);
1175            assert_eq!(r.use_ans, e.use_ans);
1176            assert_eq!(r.tree_learning, e.tree_learning);
1177            assert_eq!(r.lz77, e.lz77);
1178        }
1179    }
1180
1181    #[test]
1182    fn test_tree_threshold_base_formula() {
1183        // speed_tier = 10 - effort
1184        // threshold = 75 + 14 * speed_tier
1185        let p = EffortProfile::lossy(7, EncoderMode::Reference);
1186        assert_eq!(p.tree_threshold_base, 75.0 + 14.0 * 3.0); // speed_tier=3
1187        let p = EffortProfile::lossy(9, EncoderMode::Reference);
1188        assert_eq!(p.tree_threshold_base, 75.0 + 14.0 * 1.0); // speed_tier=1
1189        let p = EffortProfile::lossy(5, EncoderMode::Reference);
1190        assert_eq!(p.tree_threshold_base, 75.0 + 14.0 * 5.0); // speed_tier=5
1191    }
1192}