Skip to main content

gamut_webp/vp8/
frame.rs

1//! VP8 key-frame reconstruction pipeline (RFC 6386 §10–§14): the macroblock loop that ties together
2//! prediction, the transforms, quantization, and token coding into an encodable/decodable frame.
3//!
4//! This is the keystone of the lossy path. Each macroblock is predicted from the **reconstructed**
5//! neighbors in a recon buffer (the encoder predicts exactly as the decoder does), so the encoder's
6//! reconstruction is bit-identical to any conformant decoder's output. Luma uses whole-block 16×16
7//! DC/V/H/TM **or** per-4×4 `B_PRED` (ten directional submodes), and chroma whole-block 8×8 DC/V/H/TM;
8//! the encoder picks the lowest-SAD candidate per macroblock. A whole-block macroblock carries a Y2
9//! (luma-DC WHT) block; a `B_PRED` one codes luma DC inline (plane 3). The reconstruction is deblocked
10//! by the simple or normal loop filter as a final pass. Tokens may be split across 1/2/4/8 partitions
11//! by macroblock row, and all-zero macroblocks are coded as skipped. Per-macroblock loop-filter
12//! adjustments are the remaining VP8 header feature. STATUS.md section L.
13
14// The macroblock/block math indexes several fixed-size arrays in lock-step (and over partial ranges
15// like `1..16`), where explicit indices read closer to the spec than iterator adaptors.
16#![allow(clippy::needless_range_loop)]
17
18use gamut_color::{Yuv420, clip_pixel8};
19use gamut_core::{Error, Result};
20
21use super::bool_coder::{BoolDecoder, BoolEncoder};
22use super::header::{
23    self, LoopFilterParams, QuantIndices, Segmentation, UNCOMPRESSED_CHUNK_LEN, Vp8FrameHeader,
24};
25use super::loop_filter;
26use super::prediction::{self, B_DC_PRED, B_PRED, DC_PRED, H_PRED, NUM_BMODES, TM_PRED, V_PRED};
27use super::quant::{self, QuantFactors};
28use super::tokens::{self, CoeffProbs};
29use super::transform::{fdct4x4, fwht4x4, idct4x4, iwht4x4};
30
31/// The whole-block prediction modes the encoder considers, in signaling order.
32const WHOLE_BLOCK_MODES: [usize; 4] = [DC_PRED, V_PRED, H_PRED, TM_PRED];
33
34/// SAD margin by which per-subblock `B_PRED` must beat the best whole-block mode to be chosen — a
35/// coarse stand-in for `B_PRED`'s extra mode-signaling cost (true rate-distortion search is issue #32).
36const BPRED_SAD_PENALTY: u32 = 160;
37
38/// Segment-id coding tree (RFC 6386 §10 `mb_segment_tree`): four leaves over two boolean decisions.
39const MB_SEGMENT_TREE: &[i8] = &[2, 4, 0, -1, -2, -3];
40
41/// Per-segment quantizer deltas the encoder assigns (delta mode) when segmentation is enabled — a
42/// coarse spread so distinct macroblock regions get distinct quantizers (refinement is issue #32).
43const SEGMENT_QUANT_DELTAS: [i8; 4] = [-12, -4, 4, 12];
44
45/// Encoder feature toggles for a frame. Defaults to the normal loop filter, no segmentation, and a
46/// single token partition.
47#[derive(Clone, Copy)]
48pub struct EncodeOptions {
49    /// Use the simple loop filter instead of the normal one.
50    pub simple_filter: bool,
51    /// Emit four quantizer segments, assigned per macroblock by luma mean.
52    pub segmented: bool,
53    /// Number of DCT token partitions (1, 2, 4, or 8); macroblock rows are assigned round-robin.
54    pub partitions: u8,
55}
56
57impl Default for EncodeOptions {
58    fn default() -> Self {
59        Self {
60            simple_filter: false,
61            segmented: false,
62            partitions: 1,
63        }
64    }
65}
66
67/// The clamped base quantizer index for segment `s` (RFC 6386 §9.3/§10): the absolute or
68/// delta-adjusted value when segmentation is enabled, else the frame base.
69fn segment_q_index(seg: &Segmentation, base_y_ac: u8, s: usize) -> i32 {
70    if !seg.enabled {
71        return i32::from(base_y_ac);
72    }
73    let q = if seg.abs_delta {
74        i32::from(seg.quantizer[s])
75    } else {
76        i32::from(base_y_ac) + i32::from(seg.quantizer[s])
77    };
78    q.clamp(0, 127)
79}
80
81/// The four per-segment quantizer factor sets for a frame (all equal when segmentation is disabled).
82fn segment_quant_factors(header: &Vp8FrameHeader) -> [QuantFactors; 4] {
83    core::array::from_fn(|s| {
84        let base_q = segment_q_index(&header.segmentation, header.quant.y_ac, s);
85        QuantFactors::new(base_q, &header.quant)
86    })
87}
88
89/// The mean luma of macroblock `(mb_x, mb_y)` in a `stride`-wide plane, used to assign its segment.
90fn mb_luma_mean(src: &[u8], stride: usize, mb_x: usize, mb_y: usize) -> u32 {
91    let (px, py) = (mb_x * 16, mb_y * 16);
92    let mut sum = 0u32;
93    for r in 0..16 {
94        for c in 0..16 {
95            sum += u32::from(src[(py + r) * stride + px + c]);
96        }
97    }
98    sum / 256
99}
100
101/// Per-macroblock-column entropy context: whether the prior block in each position carried at least
102/// one non-zero coefficient (RFC 6386 §13.3). A single instance also serves as the running "left"
103/// context, reset at the start of each macroblock row.
104#[derive(Clone, Copy, Default)]
105struct EntropyCtx {
106    /// Y2 (luma-DC WHT) block.
107    y2: bool,
108    /// The four luma sub-block columns (above) / rows (left).
109    y: [bool; 4],
110    /// The two U sub-block columns / rows.
111    u: [bool; 2],
112    /// The two V sub-block columns / rows.
113    v: [bool; 2],
114}
115
116/// One macroblock's quantized coefficient levels: the Y2 block, 16 luma sub-blocks, 4 U and 4 V.
117#[derive(Clone, Default)]
118struct MbLevels {
119    y2: [i16; 16],
120    y: [[i16; 16]; 16],
121    u: [[i16; 16]; 4],
122    v: [[i16; 16]; 4],
123}
124
125/// Macroblock-aligned reconstructed YUV planes (luma `mb_cols*16 × mb_rows*16`, chroma half each).
126pub struct FrameBuffers {
127    width: u32,
128    height: u32,
129    mb_cols: usize,
130    mb_rows: usize,
131    y: Vec<u8>,
132    u: Vec<u8>,
133    v: Vec<u8>,
134}
135
136impl FrameBuffers {
137    fn new(width: u32, height: u32) -> Self {
138        let mb_cols = (width as usize).div_ceil(16);
139        let mb_rows = (height as usize).div_ceil(16);
140        Self {
141            width,
142            height,
143            mb_cols,
144            mb_rows,
145            y: vec![0u8; mb_cols * 16 * mb_rows * 16],
146            u: vec![0u8; mb_cols * 8 * mb_rows * 8],
147            v: vec![0u8; mb_cols * 8 * mb_rows * 8],
148        }
149    }
150
151    fn y_stride(&self) -> usize {
152        self.mb_cols * 16
153    }
154
155    fn c_stride(&self) -> usize {
156        self.mb_cols * 8
157    }
158
159    /// Crops the reconstruction to a visible-resolution [`Yuv420`].
160    #[must_use]
161    pub fn to_yuv420(&self) -> Yuv420 {
162        let (w, h) = (self.width as usize, self.height as usize);
163        let (cw, ch) = (
164            Yuv420::chroma_width(self.width) as usize,
165            Yuv420::chroma_height(self.height) as usize,
166        );
167        let crop = |plane: &[u8], stride: usize, pw: usize, ph: usize| {
168            let mut out = vec![0u8; pw * ph];
169            for row in 0..ph {
170                out[row * pw..row * pw + pw]
171                    .copy_from_slice(&plane[row * stride..row * stride + pw]);
172            }
173            out
174        };
175        let y = crop(&self.y, self.y_stride(), w, h);
176        let u = crop(&self.u, self.c_stride(), cw, ch);
177        let v = crop(&self.v, self.c_stride(), cw, ch);
178        Yuv420::new(self.width, self.height, y, u, v).expect("cropped planes match dimensions")
179    }
180}
181
182/// Picks a loop-filter strength from the base quantizer — stronger quantization deblocks harder. A
183/// coarse heuristic (true filter-level selection is part of issue #32); a level of 0 disables it.
184fn filter_level(quant_index: u8) -> u8 {
185    quant_index / 2
186}
187
188/// The clamped loop-filter level for segment `s` (RFC 6386 §10/§15.4): the segment's absolute or
189/// delta-adjusted filter strength when segmentation is enabled, else the frame base level.
190fn segment_filter_level(base: u8, seg: &Segmentation, s: usize) -> u8 {
191    if !seg.enabled {
192        return base;
193    }
194    let level = if seg.abs_delta {
195        i32::from(seg.filter_strength[s])
196    } else {
197        i32::from(base) + i32::from(seg.filter_strength[s])
198    };
199    level.clamp(0, 63) as u8
200}
201
202/// Applies the frame's configured loop filter to the reconstruction as a final whole-frame pass: the
203/// simple filter deblocks luma only, the normal filter luma and chroma. Each macroblock is filtered at
204/// its segment's level (uniform when segmentation is disabled); an all-zero level set is a no-op.
205fn apply_loop_filter(
206    recon: &mut FrameBuffers,
207    lf: &LoopFilterParams,
208    seg: &Segmentation,
209    segment_map: &[usize],
210    filter_interior: &[bool],
211) {
212    let mb_level: Vec<u8> = segment_map
213        .iter()
214        .map(|&s| segment_filter_level(lf.level, seg, s))
215        .collect();
216    if mb_level.iter().all(|&l| l == 0) {
217        return;
218    }
219    let (ys, cs, mbc, mbr) = (
220        recon.y_stride(),
221        recon.c_stride(),
222        recon.mb_cols,
223        recon.mb_rows,
224    );
225    if lf.simple {
226        loop_filter::simple_filter_luma(
227            &mut recon.y,
228            ys,
229            mbc,
230            mbr,
231            &mb_level,
232            lf.sharpness,
233            filter_interior,
234        );
235    } else {
236        loop_filter::normal_filter(
237            &mut recon.y,
238            &mut recon.u,
239            &mut recon.v,
240            ys,
241            cs,
242            mbc,
243            mbr,
244            &mb_level,
245            lf.sharpness,
246            filter_interior,
247        );
248    }
249}
250
251/// Whether a macroblock carries any non-zero quantized coefficient — the second half of the
252/// loop-filter interior-edge skip rule (RFC 6386 §15.1).
253fn mb_has_coeffs(levels: &MbLevels) -> bool {
254    levels.y2.iter().any(|&x| x != 0)
255        || levels.y.iter().flatten().any(|&x| x != 0)
256        || levels.u.iter().flatten().any(|&x| x != 0)
257        || levels.v.iter().flatten().any(|&x| x != 0)
258}
259
260/// Builds the minimal key-frame header for the given dimensions, base quantizer, and filter type.
261fn frame_header(width: u32, height: u32, quant_index: u8, simple_filter: bool) -> Vp8FrameHeader {
262    Vp8FrameHeader {
263        width: width as u16,
264        height: height as u16,
265        horizontal_scale: 0,
266        vertical_scale: 0,
267        version: 0,
268        color_space: 0,
269        clamp_required: true,
270        segmentation: Segmentation::default(),
271        loop_filter: LoopFilterParams {
272            simple: simple_filter,
273            level: filter_level(quant_index),
274            sharpness: 0,
275        },
276        token_partitions: 1,
277        quant: QuantIndices {
278            y_ac: quant_index,
279            ..QuantIndices::default()
280        },
281        refresh_entropy_probs: true,
282        // Enable per-macroblock skip coding. The skip-false probability falls with the quantizer,
283        // since coarser quantization yields more all-zero (skippable) macroblocks.
284        mb_no_skip_coeff: true,
285        prob_skip_false: (255 - quant_index).max(1),
286    }
287}
288
289/// Resets a macroblock's coefficient context to "no non-zero coefficients" for a skipped macroblock
290/// (RFC 6386 §11.1): equivalent to coding all-zero blocks, but the `B_PRED` Y2 context persists since
291/// such a macroblock carries no Y2 block.
292fn clear_mb_context(above: &mut EntropyCtx, left: &mut EntropyCtx, is_bpred: bool) {
293    if !is_bpred {
294        above.y2 = false;
295        left.y2 = false;
296    }
297    above.y = [false; 4];
298    left.y = [false; 4];
299    above.u = [false; 2];
300    left.u = [false; 2];
301    above.v = [false; 2];
302    left.v = [false; 2];
303}
304
305/// Reconstructs a skipped `B_PRED` macroblock's luma: each subblock is its prediction with no residual
306/// (the encoder's all-zero-coefficient reconstruction).
307fn reconstruct_bpred_zero(
308    recon: &mut FrameBuffers,
309    mb_x: usize,
310    mb_y: usize,
311    sub_modes: &[usize; 16],
312    above_right: &[u8; 4],
313) {
314    let (px, py, rstride) = (mb_x * 16, mb_y * 16, recon.y_stride());
315    for i in 0..16 {
316        let (r, c) = (i / 4, i % 4);
317        let (sx, sy) = (px + c * 4, py + r * 4);
318        let (a, l, corner) = subblock_neighbors(recon, sx, sy, c, above_right);
319        let pred = prediction::subblock_predict(sub_modes[i], &a, &l, corner);
320        let pred_i16: [i16; 16] = core::array::from_fn(|k| i16::from(pred[k]));
321        write_block(&mut recon.y, rstride, sx, sy, &pred_i16, &[0i16; 16]);
322    }
323}
324
325/// Replicates `src` (`sw × sh`) into a `dw × dh` plane, extending the right and bottom edges.
326fn pad_plane(src: &[u8], sw: usize, sh: usize, dw: usize, dh: usize) -> Vec<u8> {
327    let mut dst = vec![0u8; dw * dh];
328    for y in 0..dh {
329        let sy = y.min(sh - 1);
330        for x in 0..dw {
331            dst[y * dw + x] = src[sy * sw + x.min(sw - 1)];
332        }
333    }
334    dst
335}
336
337/// Gathers the `n`-pixel row at `(x, y)` of `plane` into a fixed buffer (only `[..n]` is meaningful).
338fn row_at(plane: &[u8], stride: usize, x: usize, y: usize, n: usize) -> [u8; 16] {
339    let mut b = [0u8; 16];
340    b[..n].copy_from_slice(&plane[y * stride + x..y * stride + x + n]);
341    b
342}
343
344/// Gathers the `n`-pixel column at `(x, y)` of `plane` into a fixed buffer.
345fn col_at(plane: &[u8], stride: usize, x: usize, y: usize, n: usize) -> [u8; 16] {
346    let mut b = [0u8; 16];
347    for (r, slot) in b[..n].iter_mut().enumerate() {
348        *slot = plane[(y + r) * stride + x];
349    }
350    b
351}
352
353/// Reads a 4×4 block at `(x, y)` of `plane` as 16-bit samples.
354fn read_block(plane: &[u8], stride: usize, x: usize, y: usize) -> [i16; 16] {
355    let mut b = [0i16; 16];
356    for r in 0..4 {
357        for c in 0..4 {
358            b[r * 4 + c] = i16::from(plane[(y + r) * stride + x + c]);
359        }
360    }
361    b
362}
363
364/// Extracts the 4×4 sub-block at `(sub_x, sub_y)` of a `stride`-wide prediction block, as 16-bit.
365fn sub_pred(pred: &[u8], stride: usize, sub_x: usize, sub_y: usize) -> [i16; 16] {
366    let mut out = [0i16; 16];
367    for r in 0..4 {
368        for c in 0..4 {
369            out[r * 4 + c] = i16::from(pred[(sub_y + r) * stride + sub_x + c]);
370        }
371    }
372    out
373}
374
375/// Writes `clip_pixel8(pred + residue)` into the 4×4 block at `(x, y)` of `plane`.
376fn write_block(
377    plane: &mut [u8],
378    stride: usize,
379    x: usize,
380    y: usize,
381    pred: &[i16; 16],
382    residue: &[i16; 16],
383) {
384    for r in 0..4 {
385        for c in 0..4 {
386            let v = i32::from(pred[r * 4 + c]) + i32::from(residue[r * 4 + c]);
387            plane[(y + r) * stride + x + c] = clip_pixel8(v);
388        }
389    }
390}
391
392/// The above-left corner pixel for prediction: 127 on the top macroblock row, 129 on the left column,
393/// otherwise the reconstructed pixel (RFC 6386 §12.2).
394fn corner_pixel(plane: &[u8], stride: usize, px: usize, py: usize, mb_x: usize, mb_y: usize) -> u8 {
395    if mb_y == 0 {
396        127
397    } else if mb_x == 0 {
398        129
399    } else {
400        plane[(py - 1) * stride + px - 1]
401    }
402}
403
404/// One reconstructed luma pixel, or its off-frame edge value (127 above the frame, 129 to the left).
405fn luma_pixel(recon: &FrameBuffers, y: i32, x: i32) -> u8 {
406    if y < 0 {
407        127
408    } else if x < 0 {
409        129
410    } else {
411        recon.y[y as usize * recon.y_stride() + x as usize]
412    }
413}
414
415/// The four above-right pixels of the macroblock's top-right subblock, shared by all right-column
416/// subblocks (RFC 6386 §12.3 `copy_down`). Matching libwebp: 127 on the top row; the next
417/// macroblock's top-left four pixels normally; or the current macroblock's last above pixel
418/// replicated on the rightmost column (`frame_dec.c`: `memset(top_right, top[15])`).
419fn above_right_source(recon: &FrameBuffers, mb_x: usize, mb_y: usize) -> [u8; 4] {
420    if mb_y == 0 {
421        return [127; 4];
422    }
423    let stride = recon.y_stride();
424    let row = (mb_y * 16 - 1) * stride;
425    if mb_x + 1 >= recon.mb_cols {
426        [recon.y[row + mb_x * 16 + 15]; 4]
427    } else {
428        let base = row + mb_x * 16 + 16;
429        [
430            recon.y[base],
431            recon.y[base + 1],
432            recon.y[base + 2],
433            recon.y[base + 3],
434        ]
435    }
436}
437
438/// Gathers a 4×4 luma subblock's prediction neighbors from the in-place reconstruction: the eight
439/// above pixels `A[0..8]` (four above, four above-right), the four left `L[0..4]`, and the above-left
440/// corner. `(sx, sy)` is the subblock's top-left in frame coordinates and `c` its column within the
441/// macroblock (the right column, `c == 3`, takes its above-right from the shared `above_right`).
442fn subblock_neighbors(
443    recon: &FrameBuffers,
444    sx: usize,
445    sy: usize,
446    c: usize,
447    above_right: &[u8; 4],
448) -> ([u8; 8], [u8; 4], u8) {
449    let (xi, yi) = (sx as i32, sy as i32);
450    let corner = luma_pixel(recon, yi - 1, xi - 1);
451    let mut a = [0u8; 8];
452    for k in 0..4 {
453        a[k] = luma_pixel(recon, yi - 1, xi + k as i32);
454    }
455    if c == 3 {
456        a[4..8].copy_from_slice(above_right);
457    } else {
458        for k in 0..4 {
459            a[4 + k] = luma_pixel(recon, yi - 1, xi + 4 + k as i32);
460        }
461    }
462    let mut l = [0u8; 4];
463    for k in 0..4 {
464        l[k] = luma_pixel(recon, yi + k as i32, xi - 1);
465    }
466    (a, l, corner)
467}
468
469/// Produces the 16×16 luma prediction for macroblock `(mb_x, mb_y)` under whole-block `mode`.
470fn predict_luma(recon: &FrameBuffers, mb_x: usize, mb_y: usize, mode: usize) -> [u8; 256] {
471    let (px, py, stride) = (mb_x * 16, mb_y * 16, recon.y_stride());
472    let above = (mb_y > 0).then(|| row_at(&recon.y, stride, px, py - 1, 16));
473    let left = (mb_x > 0).then(|| col_at(&recon.y, stride, px - 1, py, 16));
474    let corner = corner_pixel(&recon.y, stride, px, py, mb_x, mb_y);
475    let mut pred = [0u8; 256];
476    prediction::predict_block(
477        mode,
478        16,
479        above.as_ref().map(|a| &a[..16]),
480        left.as_ref().map(|l| &l[..16]),
481        corner,
482        &mut pred,
483    );
484    pred
485}
486
487/// Produces the 8×8 prediction for one chroma plane under whole-block `mode`.
488fn predict_chroma(plane: &[u8], stride: usize, mb_x: usize, mb_y: usize, mode: usize) -> [u8; 64] {
489    let (px, py) = (mb_x * 8, mb_y * 8);
490    let above = (mb_y > 0).then(|| row_at(plane, stride, px, py - 1, 8));
491    let left = (mb_x > 0).then(|| col_at(plane, stride, px - 1, py, 8));
492    let corner = corner_pixel(plane, stride, px, py, mb_x, mb_y);
493    let mut pred = [0u8; 64];
494    prediction::predict_block(
495        mode,
496        8,
497        above.as_ref().map(|a| &a[..8]),
498        left.as_ref().map(|l| &l[..8]),
499        corner,
500        &mut pred,
501    );
502    pred
503}
504
505/// Sum of absolute differences between an `n`×`n` prediction and the source macroblock.
506fn block_sad(pred: &[u8], src: &[u8], stride: usize, mb_x: usize, mb_y: usize, n: usize) -> u32 {
507    let mut sad = 0u32;
508    for r in 0..n {
509        for c in 0..n {
510            let s = i32::from(src[(mb_y * n + r) * stride + mb_x * n + c]);
511            sad += s.abs_diff(i32::from(pred[r * n + c]));
512        }
513    }
514    sad
515}
516
517/// Selects the lowest-SAD whole-block luma mode (a simple proxy; rate-distortion search is issue #32).
518fn select_luma_mode(
519    recon: &FrameBuffers,
520    src: &[u8],
521    stride: usize,
522    mb_x: usize,
523    mb_y: usize,
524) -> usize {
525    let mut best = (DC_PRED, u32::MAX);
526    for mode in WHOLE_BLOCK_MODES {
527        let sad = block_sad(
528            &predict_luma(recon, mb_x, mb_y, mode),
529            src,
530            stride,
531            mb_x,
532            mb_y,
533            16,
534        );
535        if sad < best.1 {
536            best = (mode, sad);
537        }
538    }
539    best.0
540}
541
542/// Selects the lowest-combined-SAD chroma mode (shared by U and V).
543fn select_chroma_mode(
544    recon: &FrameBuffers,
545    src_u: &[u8],
546    src_v: &[u8],
547    stride: usize,
548    mb_x: usize,
549    mb_y: usize,
550) -> usize {
551    let mut best = (DC_PRED, u32::MAX);
552    for mode in WHOLE_BLOCK_MODES {
553        let su = block_sad(
554            &predict_chroma(&recon.u, recon.c_stride(), mb_x, mb_y, mode),
555            src_u,
556            stride,
557            mb_x,
558            mb_y,
559            8,
560        );
561        let sv = block_sad(
562            &predict_chroma(&recon.v, recon.c_stride(), mb_x, mb_y, mode),
563            src_v,
564            stride,
565            mb_x,
566            mb_y,
567            8,
568        );
569        if su + sv < best.1 {
570            best = (mode, su + sv);
571        }
572    }
573    best.0
574}
575
576/// Reconstructs the 16 luma sub-blocks of a macroblock: the Y2 inverse-WHT supplies each sub-block's
577/// DC, the AC levels are dequantized, and `pred + idct` is written into the recon buffer. Shared by
578/// the encoder and decoder.
579fn reconstruct_luma(
580    recon: &mut FrameBuffers,
581    mb_x: usize,
582    mb_y: usize,
583    pred: &[u8; 256],
584    levels: &MbLevels,
585    qf: &QuantFactors,
586) {
587    let mut y2_dq = [0i16; 16];
588    y2_dq[0] = quant::dequantize(levels.y2[0], qf.y2_dc);
589    for k in 1..16 {
590        y2_dq[k] = quant::dequantize(levels.y2[k], qf.y2_ac);
591    }
592    let dc = iwht4x4(&y2_dq);
593
594    let stride = recon.y_stride();
595    for i in 0..16 {
596        let mut dq = [0i16; 16];
597        dq[0] = dc[i];
598        for k in 1..16 {
599            dq[k] = quant::dequantize(levels.y[i][k], qf.y1_ac);
600        }
601        let residue = idct4x4(&dq);
602        let (sc, sr) = (i % 4, i / 4);
603        write_block(
604            &mut recon.y,
605            stride,
606            mb_x * 16 + sc * 4,
607            mb_y * 16 + sr * 4,
608            &sub_pred(pred, 16, sc * 4, sr * 4),
609            &residue,
610        );
611    }
612}
613
614/// Reconstructs the four sub-blocks of one chroma plane from full (DC+AC) levels.
615fn reconstruct_chroma(
616    plane: &mut [u8],
617    stride: usize,
618    mb_x: usize,
619    mb_y: usize,
620    pred: &[u8; 64],
621    levels: &[[i16; 16]; 4],
622    qf: &QuantFactors,
623) {
624    for i in 0..4 {
625        let mut dq = [0i16; 16];
626        dq[0] = quant::dequantize(levels[i][0], qf.uv_dc);
627        for k in 1..16 {
628            dq[k] = quant::dequantize(levels[i][k], qf.uv_ac);
629        }
630        let residue = idct4x4(&dq);
631        let (sc, sr) = (i % 2, i / 2);
632        write_block(
633            plane,
634            stride,
635            mb_x * 8 + sc * 4,
636            mb_y * 8 + sr * 4,
637            &sub_pred(pred, 8, sc * 4, sr * 4),
638            &residue,
639        );
640    }
641}
642
643/// Transforms + quantizes one luma macroblock against its prediction, returning the Y2 and per
644/// sub-block AC levels.
645fn quantize_luma(
646    src: &[u8],
647    stride: usize,
648    mb_x: usize,
649    mb_y: usize,
650    pred: &[u8; 256],
651    qf: &QuantFactors,
652    levels: &mut MbLevels,
653) {
654    let mut y_coeffs = [[0i16; 16]; 16];
655    let mut y_dc = [0i16; 16];
656    for i in 0..16 {
657        let (sc, sr) = (i % 4, i / 4);
658        let block = read_block(src, stride, mb_x * 16 + sc * 4, mb_y * 16 + sr * 4);
659        let p = sub_pred(pred, 16, sc * 4, sr * 4);
660        let residue: [i16; 16] = core::array::from_fn(|k| block[k] - p[k]);
661        y_coeffs[i] = fdct4x4(&residue);
662        y_dc[i] = y_coeffs[i][0];
663    }
664    let y2_coeffs = fwht4x4(&y_dc);
665    levels.y2[0] = quant::quantize(y2_coeffs[0], qf.y2_dc);
666    for k in 1..16 {
667        levels.y2[k] = quant::quantize(y2_coeffs[k], qf.y2_ac);
668    }
669    for i in 0..16 {
670        for k in 1..16 {
671            levels.y[i][k] = quant::quantize(y_coeffs[i][k], qf.y1_ac);
672        }
673    }
674}
675
676/// Transforms + quantizes one chroma plane's four sub-blocks against its prediction.
677fn quantize_chroma(
678    src: &[u8],
679    stride: usize,
680    mb_x: usize,
681    mb_y: usize,
682    pred: &[u8; 64],
683    qf: &QuantFactors,
684) -> [[i16; 16]; 4] {
685    let mut levels = [[0i16; 16]; 4];
686    for i in 0..4 {
687        let (sc, sr) = (i % 2, i / 2);
688        let block = read_block(src, stride, mb_x * 8 + sc * 4, mb_y * 8 + sr * 4);
689        let p = sub_pred(pred, 8, sc * 4, sr * 4);
690        let residue: [i16; 16] = core::array::from_fn(|k| block[k] - p[k]);
691        let coeffs = fdct4x4(&residue);
692        levels[i][0] = quant::quantize(coeffs[0], qf.uv_dc);
693        for k in 1..16 {
694            levels[i][k] = quant::quantize(coeffs[k], qf.uv_ac);
695        }
696    }
697    levels
698}
699
700/// Encodes the luma plane of a `B_PRED` macroblock: per subblock (raster order), selects the
701/// lowest-SAD submode, quantizes the residual (plane 3 — DC included, no Y2), and reconstructs in
702/// place so the next subblock predicts from it. Returns the 16 submodes, their quantized levels, and
703/// the total prediction SAD (for the macroblock mode decision).
704fn encode_bpred_luma(
705    recon: &mut FrameBuffers,
706    src: &[u8],
707    stride: usize,
708    mb_x: usize,
709    mb_y: usize,
710    qf: &QuantFactors,
711    above_right: &[u8; 4],
712) -> ([usize; 16], [[i16; 16]; 16], u32) {
713    let (px, py, rstride) = (mb_x * 16, mb_y * 16, recon.y_stride());
714    let mut sub_modes = [B_DC_PRED; 16];
715    let mut levels = [[0i16; 16]; 16];
716    let mut total_sad = 0u32;
717    for i in 0..16 {
718        let (r, c) = (i / 4, i % 4);
719        let (sx, sy) = (px + c * 4, py + r * 4);
720        let (a, l, corner) = subblock_neighbors(recon, sx, sy, c, above_right);
721        let src_sub = read_block(src, stride, sx, sy);
722        let mut best = (B_DC_PRED, u32::MAX, [0u8; 16]);
723        for m in 0..NUM_BMODES {
724            let pred = prediction::subblock_predict(m, &a, &l, corner);
725            let sad: u32 = (0..16)
726                .map(|k| i32::from(src_sub[k]).abs_diff(i32::from(pred[k])))
727                .sum();
728            if sad < best.1 {
729                best = (m, sad, pred);
730            }
731        }
732        let (mode, sad, pred) = best;
733        sub_modes[i] = mode;
734        total_sad += sad;
735
736        let residue: [i16; 16] = core::array::from_fn(|k| src_sub[k] - i16::from(pred[k]));
737        let coeffs = fdct4x4(&residue);
738        levels[i][0] = quant::quantize(coeffs[0], qf.y1_dc);
739        for k in 1..16 {
740            levels[i][k] = quant::quantize(coeffs[k], qf.y1_ac);
741        }
742        let mut dq = [0i16; 16];
743        dq[0] = quant::dequantize(levels[i][0], qf.y1_dc);
744        for k in 1..16 {
745            dq[k] = quant::dequantize(levels[i][k], qf.y1_ac);
746        }
747        let residue = idct4x4(&dq);
748        let pred_i16: [i16; 16] = core::array::from_fn(|k| i16::from(pred[k]));
749        write_block(&mut recon.y, rstride, sx, sy, &pred_i16, &residue);
750    }
751    (sub_modes, levels, total_sad)
752}
753
754/// Decodes and reconstructs the luma plane of a `B_PRED` macroblock from its submodes and the token
755/// partition, interleaving token decode and reconstruction (each subblock predicts from the one
756/// before it) and threading the plane-3 non-zero context. Leaves the Y2 context untouched.
757#[allow(clippy::too_many_arguments)] // the reconstruction loop genuinely needs all of this state
758fn decode_bpred_luma(
759    recon: &mut FrameBuffers,
760    dec: &mut BoolDecoder,
761    above: &mut EntropyCtx,
762    left: &mut EntropyCtx,
763    probs: &CoeffProbs,
764    mb_x: usize,
765    mb_y: usize,
766    qf: &QuantFactors,
767    sub_modes: &[usize; 16],
768    above_right: &[u8; 4],
769) {
770    let (px, py, rstride) = (mb_x * 16, mb_y * 16, recon.y_stride());
771    for i in 0..16 {
772        let (r, c) = (i / 4, i % 4);
773        let (sx, sy) = (px + c * 4, py + r * 4);
774        let ctx = usize::from(above.y[c]) + usize::from(left.y[r]);
775        let mut lev = [0i16; 16];
776        let has = tokens::decode_block(dec, &mut lev, 3, ctx, probs);
777        above.y[c] = has;
778        left.y[r] = has;
779
780        let (a, l, corner) = subblock_neighbors(recon, sx, sy, c, above_right);
781        let pred = prediction::subblock_predict(sub_modes[i], &a, &l, corner);
782        let mut dq = [0i16; 16];
783        dq[0] = quant::dequantize(lev[0], qf.y1_dc);
784        for k in 1..16 {
785            dq[k] = quant::dequantize(lev[k], qf.y1_ac);
786        }
787        let residue = idct4x4(&dq);
788        let pred_i16: [i16; 16] = core::array::from_fn(|k| i16::from(pred[k]));
789        write_block(&mut recon.y, rstride, sx, sy, &pred_i16, &residue);
790    }
791}
792
793/// The above/left subblock-mode context for the `j`th subblock (RFC 6386 §11.3): the mode of the
794/// subblock above (within the macroblock for rows > 0, else `above_col`) and to the left (within for
795/// columns > 0, else `left_col`).
796fn bmode_context(
797    sub_modes: &[usize; 16],
798    above_col: &[usize; 4],
799    left_col: &[usize; 4],
800    i: usize,
801) -> (usize, usize) {
802    let (r, c) = (i / 4, i % 4);
803    let a = if r > 0 {
804        sub_modes[i - 4]
805    } else {
806        above_col[c]
807    };
808    let l = if c > 0 { sub_modes[i - 1] } else { left_col[r] };
809    (a, l)
810}
811
812/// Writes the 16 `B_PRED` submodes, each tree-coded with its neighbor context (RFC 6386 §11.3).
813fn write_bmodes(
814    modes: &mut BoolEncoder,
815    sub_modes: &[usize; 16],
816    above_col: &[usize; 4],
817    left_col: &[usize; 4],
818) {
819    for i in 0..16 {
820        let (a, l) = bmode_context(sub_modes, above_col, left_col, i);
821        modes.put_tree(
822            prediction::BMODE_TREE,
823            &prediction::KF_BMODE_PROB[a][l],
824            sub_modes[i],
825        );
826    }
827}
828
829/// Reads the 16 `B_PRED` submodes, mirroring [`write_bmodes`].
830fn read_bmodes(
831    modes: &mut BoolDecoder,
832    above_col: &[usize; 4],
833    left_col: &[usize; 4],
834) -> [usize; 16] {
835    let mut sub_modes = [B_DC_PRED; 16];
836    for i in 0..16 {
837        let (a, l) = bmode_context(&sub_modes, above_col, left_col, i);
838        sub_modes[i] = modes.get_tree(prediction::BMODE_TREE, &prediction::KF_BMODE_PROB[a][l]);
839    }
840    sub_modes
841}
842
843/// The macroblock's bottom-row and right-column subblock modes, to seed the above/left context of the
844/// next row/column (RFC 6386 §11.3 caveat 4): the actual submodes for `B_PRED`, else the constant
845/// derived from the whole-block luma mode.
846fn bmode_propagation(
847    is_bpred: bool,
848    luma_mode: usize,
849    sub_modes: &[usize; 16],
850) -> ([usize; 4], [usize; 4]) {
851    if is_bpred {
852        (
853            [sub_modes[12], sub_modes[13], sub_modes[14], sub_modes[15]],
854            [sub_modes[3], sub_modes[7], sub_modes[11], sub_modes[15]],
855        )
856    } else {
857        let bm = prediction::bmode_for_luma(luma_mode);
858        ([bm; 4], [bm; 4])
859    }
860}
861
862/// Codes one macroblock's coefficient blocks in Y2 → Y → U → V order, threading the `above`/`left`
863/// non-zero context (RFC 6386 §13.3). A `B_PRED` macroblock has no Y2 block (its context persists)
864/// and codes luma with plane 3 (DC included); otherwise luma uses plane 0 (DC carried by Y2).
865fn encode_mb_tokens(
866    enc: &mut BoolEncoder,
867    above: &mut EntropyCtx,
868    left: &mut EntropyCtx,
869    probs: &CoeffProbs,
870    levels: &MbLevels,
871    is_bpred: bool,
872) {
873    if !is_bpred {
874        let ctx = usize::from(above.y2) + usize::from(left.y2);
875        let has = tokens::encode_block(enc, &levels.y2, 1, ctx, probs);
876        above.y2 = has;
877        left.y2 = has;
878    }
879    let plane = if is_bpred { 3 } else { 0 };
880    for i in 0..16 {
881        let (r, c) = (i / 4, i % 4);
882        let ctx = usize::from(above.y[c]) + usize::from(left.y[r]);
883        let has = tokens::encode_block(enc, &levels.y[i], plane, ctx, probs);
884        above.y[c] = has;
885        left.y[r] = has;
886    }
887    encode_chroma_tokens(enc, above, left, probs, levels);
888}
889
890/// Codes a macroblock's U then V chroma blocks (plane 2).
891fn encode_chroma_tokens(
892    enc: &mut BoolEncoder,
893    above: &mut EntropyCtx,
894    left: &mut EntropyCtx,
895    probs: &CoeffProbs,
896    levels: &MbLevels,
897) {
898    for (plane_levels, above_ctx, left_ctx) in [
899        (&levels.u, &mut above.u, &mut left.u),
900        (&levels.v, &mut above.v, &mut left.v),
901    ] {
902        for i in 0..4 {
903            let (r, c) = (i / 2, i % 2);
904            let ctx = usize::from(above_ctx[c]) + usize::from(left_ctx[r]);
905            let has = tokens::encode_block(enc, &plane_levels[i], 2, ctx, probs);
906            above_ctx[c] = has;
907            left_ctx[r] = has;
908        }
909    }
910}
911
912/// Decodes a macroblock's U then V chroma blocks into `levels`, mirroring [`encode_chroma_tokens`].
913fn decode_chroma_tokens(
914    dec: &mut BoolDecoder,
915    above: &mut EntropyCtx,
916    left: &mut EntropyCtx,
917    probs: &CoeffProbs,
918    levels: &mut MbLevels,
919) {
920    for (plane_levels, above_ctx, left_ctx) in [
921        (&mut levels.u, &mut above.u, &mut left.u),
922        (&mut levels.v, &mut above.v, &mut left.v),
923    ] {
924        for i in 0..4 {
925            let (r, c) = (i / 2, i % 2);
926            let ctx = usize::from(above_ctx[c]) + usize::from(left_ctx[r]);
927            let has = tokens::decode_block(dec, &mut plane_levels[i], 2, ctx, probs);
928            above_ctx[c] = has;
929            left_ctx[r] = has;
930        }
931    }
932}
933
934/// Decodes a whole-block (non-`B_PRED`) macroblock's coefficient blocks: Y2, 16 luma (plane 0), then
935/// chroma.
936fn decode_mb_tokens(
937    dec: &mut BoolDecoder,
938    above: &mut EntropyCtx,
939    left: &mut EntropyCtx,
940    probs: &CoeffProbs,
941) -> MbLevels {
942    let mut levels = MbLevels::default();
943    let ctx = usize::from(above.y2) + usize::from(left.y2);
944    let has = tokens::decode_block(dec, &mut levels.y2, 1, ctx, probs);
945    above.y2 = has;
946    left.y2 = has;
947    for i in 0..16 {
948        let (r, c) = (i / 4, i % 4);
949        let ctx = usize::from(above.y[c]) + usize::from(left.y[r]);
950        let has = tokens::decode_block(dec, &mut levels.y[i], 0, ctx, probs);
951        above.y[c] = has;
952        left.y[r] = has;
953    }
954    decode_chroma_tokens(dec, above, left, probs, &mut levels);
955    levels
956}
957
958/// Encodes a [`Yuv420`] image as a VP8 key-frame bitstream (the `VP8 ` chunk payload), returning the
959/// bitstream and the encoder's reconstruction (the tier-2 oracle: it must equal any decoder's output).
960/// Uses the normal loop filter.
961#[must_use]
962pub fn encode_frame(yuv: &Yuv420, quant_index: u8) -> (Vec<u8>, FrameBuffers) {
963    encode_frame_filtered(yuv, quant_index, EncodeOptions::default())
964}
965
966/// Encodes a frame with explicit [`EncodeOptions`] — the loop-filter type and whether to emit
967/// quantizer segments. [`encode_frame`] uses the defaults (normal filter, unsegmented). This lets the
968/// differential oracle drive the alternative encoder paths.
969#[must_use]
970pub fn encode_frame_filtered(
971    yuv: &Yuv420,
972    quant_index: u8,
973    opts: EncodeOptions,
974) -> (Vec<u8>, FrameBuffers) {
975    let mut header = frame_header(yuv.width(), yuv.height(), quant_index, opts.simple_filter);
976    if opts.segmented {
977        header.segmentation = Segmentation {
978            enabled: true,
979            update_map: true,
980            abs_delta: false,
981            quantizer: SEGMENT_QUANT_DELTAS,
982            filter_strength: [0; 4],
983            tree_probs: [128, 128, 128],
984        };
985    }
986    header.token_partitions = opts.partitions.max(1);
987    let n = header.token_partitions as usize;
988    let seg_qf = segment_quant_factors(&header);
989    let mut recon = FrameBuffers::new(yuv.width(), yuv.height());
990
991    let (yw, yh) = (recon.y_stride(), recon.mb_rows * 16);
992    let (cw, ch) = (recon.c_stride(), recon.mb_rows * 8);
993    let src_y = pad_plane(yuv.y(), yuv.width() as usize, yuv.height() as usize, yw, yh);
994    let vcw = Yuv420::chroma_width(yuv.width()) as usize;
995    let vch = Yuv420::chroma_height(yuv.height()) as usize;
996    let src_u = pad_plane(yuv.u(), vcw, vch, cw, ch);
997    let src_v = pad_plane(yuv.v(), vcw, vch, cw, ch);
998
999    let segment_map: Vec<usize> = (0..recon.mb_rows * recon.mb_cols)
1000        .map(|i| {
1001            if header.segmentation.enabled {
1002                let (mbx, mby) = (i % recon.mb_cols, i / recon.mb_cols);
1003                (mb_luma_mean(&src_y, yw, mbx, mby) / 64).min(3) as usize
1004            } else {
1005                0
1006            }
1007        })
1008        .collect();
1009
1010    let mut modes = BoolEncoder::new();
1011    header::write_frame_header(&mut modes, &header);
1012    let mut residuals: Vec<BoolEncoder> = (0..n).map(|_| BoolEncoder::new()).collect();
1013    let probs = &tokens::DEFAULT_COEFF_PROBS;
1014
1015    let mut above = vec![EntropyCtx::default(); recon.mb_cols];
1016    let mut above_bmodes = vec![[B_DC_PRED; 4]; recon.mb_cols];
1017    let mut filter_interior = vec![false; recon.mb_cols * recon.mb_rows];
1018    for mb_y in 0..recon.mb_rows {
1019        let mut left = EntropyCtx::default();
1020        let mut left_bmodes = [B_DC_PRED; 4];
1021        for mb_x in 0..recon.mb_cols {
1022            let segment = segment_map[mb_y * recon.mb_cols + mb_x];
1023            let qf = seg_qf[segment];
1024            let uv_mode = select_chroma_mode(&recon, &src_u, &src_v, cw, mb_x, mb_y);
1025            let u_pred = predict_chroma(&recon.u, recon.c_stride(), mb_x, mb_y, uv_mode);
1026            let v_pred = predict_chroma(&recon.v, recon.c_stride(), mb_x, mb_y, uv_mode);
1027
1028            // Whole-block luma candidate and its prediction SAD.
1029            let wb_mode = select_luma_mode(&recon, &src_y, yw, mb_x, mb_y);
1030            let wb_sad = block_sad(
1031                &predict_luma(&recon, mb_x, mb_y, wb_mode),
1032                &src_y,
1033                yw,
1034                mb_x,
1035                mb_y,
1036                16,
1037            );
1038
1039            // B_PRED candidate — scribbles its reconstruction into recon.y while selecting submodes.
1040            let above_right = above_right_source(&recon, mb_x, mb_y);
1041            let (sub_modes, bpred_levels, bpred_sad) =
1042                encode_bpred_luma(&mut recon, &src_y, yw, mb_x, mb_y, &qf, &above_right);
1043            let use_bpred = bpred_sad + BPRED_SAD_PENALTY < wb_sad;
1044
1045            let mut levels = MbLevels {
1046                u: quantize_chroma(&src_u, cw, mb_x, mb_y, &u_pred, &qf),
1047                v: quantize_chroma(&src_v, cw, mb_x, mb_y, &v_pred, &qf),
1048                ..Default::default()
1049            };
1050            // Compute the luma levels before writing modes so the skip flag — which precedes the luma
1051            // mode — reflects the whole macroblock. Whole-block luma is reconstructed afterward (B_PRED
1052            // was already reconstructed during submode selection).
1053            let wb_pred = (!use_bpred).then(|| predict_luma(&recon, mb_x, mb_y, wb_mode));
1054            if let Some(yp) = &wb_pred {
1055                quantize_luma(&src_y, yw, mb_x, mb_y, yp, &qf, &mut levels);
1056            } else {
1057                levels.y = bpred_levels;
1058            }
1059            let skip = !mb_has_coeffs(&levels);
1060            let y_mode = if use_bpred { B_PRED } else { wb_mode };
1061
1062            if header.segmentation.update_map {
1063                modes.put_tree(MB_SEGMENT_TREE, &header.segmentation.tree_probs, segment);
1064            }
1065            modes.put_bool(header.prob_skip_false, skip);
1066            modes.put_tree(
1067                prediction::KF_YMODE_TREE,
1068                &prediction::KF_YMODE_PROB,
1069                y_mode,
1070            );
1071            if use_bpred {
1072                write_bmodes(&mut modes, &sub_modes, &above_bmodes[mb_x], &left_bmodes);
1073            }
1074            modes.put_tree(
1075                prediction::KF_UV_MODE_TREE,
1076                &prediction::KF_UV_MODE_PROB,
1077                uv_mode,
1078            );
1079
1080            if let Some(yp) = &wb_pred {
1081                reconstruct_luma(&mut recon, mb_x, mb_y, yp, &levels, &qf);
1082            }
1083            let cstride = recon.c_stride();
1084            reconstruct_chroma(&mut recon.u, cstride, mb_x, mb_y, &u_pred, &levels.u, &qf);
1085            reconstruct_chroma(&mut recon.v, cstride, mb_x, mb_y, &v_pred, &levels.v, &qf);
1086
1087            filter_interior[mb_y * recon.mb_cols + mb_x] = use_bpred || mb_has_coeffs(&levels);
1088            if skip {
1089                clear_mb_context(&mut above[mb_x], &mut left, use_bpred);
1090            } else {
1091                encode_mb_tokens(
1092                    &mut residuals[mb_y % n],
1093                    &mut above[mb_x],
1094                    &mut left,
1095                    probs,
1096                    &levels,
1097                    use_bpred,
1098                );
1099            }
1100
1101            (above_bmodes[mb_x], left_bmodes) = bmode_propagation(use_bpred, wb_mode, &sub_modes);
1102        }
1103    }
1104
1105    apply_loop_filter(
1106        &mut recon,
1107        &header.loop_filter,
1108        &header.segmentation,
1109        &segment_map,
1110        &filter_interior,
1111    );
1112
1113    let part0 = modes.finish();
1114    let token_parts: Vec<Vec<u8>> = residuals.into_iter().map(BoolEncoder::finish).collect();
1115    let mut out = Vec::new();
1116    header::write_uncompressed_chunk(&header, part0.len() as u32, &mut out);
1117    out.extend_from_slice(&part0);
1118    // The first N-1 token-partition sizes are stored as 3-byte little-endian prefixes (§9.5); the
1119    // last partition's size is implied by the remainder.
1120    for part in &token_parts[..n - 1] {
1121        let len = part.len() as u32;
1122        out.extend_from_slice(&[len as u8, (len >> 8) as u8, (len >> 16) as u8]);
1123    }
1124    for part in &token_parts {
1125        out.extend_from_slice(part);
1126    }
1127    (out, recon)
1128}
1129
1130/// Splits the token-partition section (everything after the control partition) into `n` boolean
1131/// decoders (RFC 6386 §9.5): the first `n-1` partition sizes are 3-byte little-endian prefixes, the
1132/// last partition's size is the remainder.
1133fn split_token_partitions(data: &[u8], n: usize) -> Result<Vec<BoolDecoder<'_>>> {
1134    let sizes_len = (n - 1) * 3;
1135    if data.len() < sizes_len {
1136        return Err(Error::InvalidInput("VP8: token-partition sizes truncated"));
1137    }
1138    let mut decoders = Vec::with_capacity(n);
1139    let mut offset = sizes_len;
1140    for i in 0..n {
1141        let size = if i < n - 1 {
1142            let s = &data[i * 3..i * 3 + 3];
1143            usize::from(s[0]) | (usize::from(s[1]) << 8) | (usize::from(s[2]) << 16)
1144        } else {
1145            data.len() - offset
1146        };
1147        let end = offset
1148            .checked_add(size)
1149            .filter(|&e| e <= data.len())
1150            .ok_or(Error::InvalidInput("VP8: token partition exceeds frame"))?;
1151        decoders.push(BoolDecoder::new(&data[offset..end]));
1152        offset = end;
1153    }
1154    Ok(decoders)
1155}
1156
1157/// Decodes a VP8 key-frame bitstream (the `VP8 ` chunk payload) into reconstructed planes.
1158///
1159/// # Errors
1160///
1161/// Returns [`Error::InvalidInput`] for a malformed stream or [`Error::Unsupported`] for features not
1162/// yet implemented (per-macroblock loop-filter adjustments, …).
1163pub fn decode_frame(data: &[u8]) -> Result<FrameBuffers> {
1164    let chunk = header::read_uncompressed_chunk(data)?;
1165    if chunk.width == 0 || chunk.height == 0 {
1166        return Err(Error::InvalidInput("VP8: zero frame dimension"));
1167    }
1168    let part0_end = UNCOMPRESSED_CHUNK_LEN + chunk.first_partition_size as usize;
1169    if part0_end > data.len() {
1170        return Err(Error::InvalidInput("VP8: first partition exceeds frame"));
1171    }
1172    let mut modes = BoolDecoder::new(&data[UNCOMPRESSED_CHUNK_LEN..part0_end]);
1173    let (head, coeff_probs) = header::read_frame_header(&chunk, &mut modes)?;
1174    let seg_qf = segment_quant_factors(&head);
1175    let n = head.token_partitions as usize;
1176    let mut residuals = split_token_partitions(&data[part0_end..], n)?;
1177    let mut recon = FrameBuffers::new(u32::from(chunk.width), u32::from(chunk.height));
1178
1179    let mut above = vec![EntropyCtx::default(); recon.mb_cols];
1180    let mut above_bmodes = vec![[B_DC_PRED; 4]; recon.mb_cols];
1181    let mut filter_interior = vec![false; recon.mb_cols * recon.mb_rows];
1182    let mut segment_map = vec![0usize; recon.mb_cols * recon.mb_rows];
1183    for mb_y in 0..recon.mb_rows {
1184        let mut left = EntropyCtx::default();
1185        let mut left_bmodes = [B_DC_PRED; 4];
1186        for mb_x in 0..recon.mb_cols {
1187            let segment = if head.segmentation.update_map {
1188                modes.get_tree(MB_SEGMENT_TREE, &head.segmentation.tree_probs)
1189            } else {
1190                0
1191            };
1192            segment_map[mb_y * recon.mb_cols + mb_x] = segment;
1193            let qf = seg_qf[segment];
1194            let skip = head.mb_no_skip_coeff && modes.get_bool(head.prob_skip_false);
1195            let y_mode = modes.get_tree(prediction::KF_YMODE_TREE, &prediction::KF_YMODE_PROB);
1196            let is_bpred = y_mode == B_PRED;
1197            let sub_modes = if is_bpred {
1198                read_bmodes(&mut modes, &above_bmodes[mb_x], &left_bmodes)
1199            } else {
1200                [B_DC_PRED; 16]
1201            };
1202            let uv_mode = modes.get_tree(prediction::KF_UV_MODE_TREE, &prediction::KF_UV_MODE_PROB);
1203            let u_pred = predict_chroma(&recon.u, recon.c_stride(), mb_x, mb_y, uv_mode);
1204            let v_pred = predict_chroma(&recon.v, recon.c_stride(), mb_x, mb_y, uv_mode);
1205            let cstride = recon.c_stride();
1206
1207            // A skipped macroblock has no coefficients: its residual is zero (the reconstruction is the
1208            // prediction) and no tokens are read.
1209            let mut levels = MbLevels::default();
1210            if is_bpred {
1211                let above_right = above_right_source(&recon, mb_x, mb_y);
1212                if skip {
1213                    reconstruct_bpred_zero(&mut recon, mb_x, mb_y, &sub_modes, &above_right);
1214                } else {
1215                    decode_bpred_luma(
1216                        &mut recon,
1217                        &mut residuals[mb_y % n],
1218                        &mut above[mb_x],
1219                        &mut left,
1220                        &coeff_probs,
1221                        mb_x,
1222                        mb_y,
1223                        &qf,
1224                        &sub_modes,
1225                        &above_right,
1226                    );
1227                    decode_chroma_tokens(
1228                        &mut residuals[mb_y % n],
1229                        &mut above[mb_x],
1230                        &mut left,
1231                        &coeff_probs,
1232                        &mut levels,
1233                    );
1234                }
1235                reconstruct_chroma(&mut recon.u, cstride, mb_x, mb_y, &u_pred, &levels.u, &qf);
1236                reconstruct_chroma(&mut recon.v, cstride, mb_x, mb_y, &v_pred, &levels.v, &qf);
1237                filter_interior[mb_y * recon.mb_cols + mb_x] = true; // B_PRED always filters interiors
1238            } else {
1239                let y_pred = predict_luma(&recon, mb_x, mb_y, y_mode);
1240                if !skip {
1241                    levels = decode_mb_tokens(
1242                        &mut residuals[mb_y % n],
1243                        &mut above[mb_x],
1244                        &mut left,
1245                        &coeff_probs,
1246                    );
1247                }
1248                reconstruct_luma(&mut recon, mb_x, mb_y, &y_pred, &levels, &qf);
1249                reconstruct_chroma(&mut recon.u, cstride, mb_x, mb_y, &u_pred, &levels.u, &qf);
1250                reconstruct_chroma(&mut recon.v, cstride, mb_x, mb_y, &v_pred, &levels.v, &qf);
1251                filter_interior[mb_y * recon.mb_cols + mb_x] = mb_has_coeffs(&levels);
1252            }
1253            if skip {
1254                clear_mb_context(&mut above[mb_x], &mut left, is_bpred);
1255            }
1256
1257            (above_bmodes[mb_x], left_bmodes) = bmode_propagation(is_bpred, y_mode, &sub_modes);
1258        }
1259    }
1260
1261    apply_loop_filter(
1262        &mut recon,
1263        &head.loop_filter,
1264        &head.segmentation,
1265        &segment_map,
1266        &filter_interior,
1267    );
1268    Ok(recon)
1269}
1270
1271#[cfg(test)]
1272mod tests {
1273    use super::*;
1274
1275    /// Builds a `Yuv420` from a deterministic synthetic pattern.
1276    fn pattern(width: u32, height: u32) -> Yuv420 {
1277        let (w, h) = (width as usize, height as usize);
1278        let (cw, ch) = (
1279            Yuv420::chroma_width(width) as usize,
1280            Yuv420::chroma_height(height) as usize,
1281        );
1282        let y = (0..w * h)
1283            .map(|i| ((i * 7 + i / w * 13) & 0xff) as u8)
1284            .collect();
1285        let u = (0..cw * ch).map(|i| ((i * 5 + 64) & 0xff) as u8).collect();
1286        let v = (0..cw * ch)
1287            .map(|i| ((i * 11 + 128) & 0xff) as u8)
1288            .collect();
1289        Yuv420::new(width, height, y, u, v).unwrap()
1290    }
1291
1292    /// Builds B_PRED-favorable content: each 4×4 region carries a different gradient direction, so a
1293    /// single whole-block mode predicts the macroblock poorly but per-subblock modes do not.
1294    fn detailed(width: u32, height: u32) -> Yuv420 {
1295        let (w, h) = (width as usize, height as usize);
1296        let (cw, ch) = (
1297            Yuv420::chroma_width(width) as usize,
1298            Yuv420::chroma_height(height) as usize,
1299        );
1300        let y = (0..w * h)
1301            .map(|i| {
1302                let (x, yy) = (i % w, i / w);
1303                let v = match (x / 4 + yy / 4) % 4 {
1304                    0 => x * 18,
1305                    1 => yy * 18,
1306                    2 => (x + yy) * 18,
1307                    _ => x.wrapping_sub(yy).wrapping_mul(18),
1308                };
1309                (v & 0xff) as u8
1310            })
1311            .collect();
1312        let u = (0..cw * ch).map(|i| ((i * 3) & 0xff) as u8).collect();
1313        let v = (0..cw * ch).map(|i| ((i * 9 + 70) & 0xff) as u8).collect();
1314        Yuv420::new(width, height, y, u, v).unwrap()
1315    }
1316
1317    /// Counts macroblocks coded as `B_PRED` by re-reading partition 0, to confirm the path is
1318    /// genuinely exercised (not merely available).
1319    /// Re-reads partition 0 (modes) and returns `(B_PRED macroblocks, skipped macroblocks)`, to
1320    /// confirm those paths are genuinely exercised.
1321    fn mode_stats(data: &[u8]) -> (usize, usize) {
1322        let chunk = header::read_uncompressed_chunk(data).unwrap();
1323        let part0_end = UNCOMPRESSED_CHUNK_LEN + chunk.first_partition_size as usize;
1324        let mut modes = BoolDecoder::new(&data[UNCOMPRESSED_CHUNK_LEN..part0_end]);
1325        let (head, _) = header::read_frame_header(&chunk, &mut modes).unwrap();
1326        let mb_cols = (chunk.width as usize).div_ceil(16);
1327        let mb_rows = (chunk.height as usize).div_ceil(16);
1328        let mut above_bmodes = vec![[B_DC_PRED; 4]; mb_cols];
1329        let (mut bpred, mut skipped) = (0, 0);
1330        for _ in 0..mb_rows {
1331            let mut left_bmodes = [B_DC_PRED; 4];
1332            for mb_x in 0..mb_cols {
1333                if head.segmentation.update_map {
1334                    let _ = modes.get_tree(MB_SEGMENT_TREE, &head.segmentation.tree_probs);
1335                }
1336                if head.mb_no_skip_coeff && modes.get_bool(head.prob_skip_false) {
1337                    skipped += 1;
1338                }
1339                let y_mode = modes.get_tree(prediction::KF_YMODE_TREE, &prediction::KF_YMODE_PROB);
1340                let is_bpred = y_mode == B_PRED;
1341                let sub_modes = if is_bpred {
1342                    bpred += 1;
1343                    read_bmodes(&mut modes, &above_bmodes[mb_x], &left_bmodes)
1344                } else {
1345                    [B_DC_PRED; 16]
1346                };
1347                let _ = modes.get_tree(prediction::KF_UV_MODE_TREE, &prediction::KF_UV_MODE_PROB);
1348                (above_bmodes[mb_x], left_bmodes) = bmode_propagation(is_bpred, y_mode, &sub_modes);
1349            }
1350        }
1351        (bpred, skipped)
1352    }
1353
1354    #[test]
1355    fn bpred_is_exercised_and_bit_exact() {
1356        let yuv = detailed(48, 48);
1357        let (bitstream, recon) = encode_frame(&yuv, 8);
1358        assert!(
1359            mode_stats(&bitstream).0 > 0,
1360            "detailed content should select B_PRED for some macroblocks"
1361        );
1362        let decoded = decode_frame(&bitstream).expect("decode");
1363        let (enc, dec) = (recon.to_yuv420(), decoded.to_yuv420());
1364        assert_eq!(enc.y(), dec.y(), "B_PRED luma mismatch");
1365        assert_eq!(enc.u(), dec.u(), "B_PRED u mismatch");
1366        assert_eq!(enc.v(), dec.v(), "B_PRED v mismatch");
1367    }
1368
1369    #[test]
1370    fn mb_skip_is_exercised_and_bit_exact() {
1371        // A flat image predicts to 128 with a zero residual, so every macroblock is skipped; the
1372        // decode must reproduce it from the skip flags alone.
1373        let (w, h) = (48u32, 48u32);
1374        let (cw, ch) = (
1375            Yuv420::chroma_width(w) as usize,
1376            Yuv420::chroma_height(h) as usize,
1377        );
1378        let yuv = Yuv420::new(
1379            w,
1380            h,
1381            vec![128u8; (w * h) as usize],
1382            vec![128u8; cw * ch],
1383            vec![128u8; cw * ch],
1384        )
1385        .unwrap();
1386        let (bits, recon) = encode_frame(&yuv, 60);
1387        assert!(
1388            mode_stats(&bits).1 > 0,
1389            "flat content should skip macroblocks"
1390        );
1391        let dec = decode_frame(&bits).expect("decode");
1392        assert_eq!(recon.to_yuv420().y(), dec.to_yuv420().y());
1393        assert_eq!(recon.to_yuv420().u(), dec.to_yuv420().u());
1394        assert_eq!(recon.to_yuv420().v(), dec.to_yuv420().v());
1395    }
1396
1397    /// Tier-2: the encoder's reconstruction must equal the native decoder's output, bit-for-bit.
1398    fn assert_encoder_recon_matches_decoder(width: u32, height: u32, q: u8) {
1399        let yuv = pattern(width, height);
1400        let (bitstream, recon) = encode_frame(&yuv, q);
1401        let decoded = decode_frame(&bitstream).expect("decode");
1402        let enc = recon.to_yuv420();
1403        let dec = decoded.to_yuv420();
1404        assert_eq!(enc.y(), dec.y(), "luma mismatch at {width}x{height} q{q}");
1405        assert_eq!(enc.u(), dec.u(), "u mismatch at {width}x{height} q{q}");
1406        assert_eq!(enc.v(), dec.v(), "v mismatch at {width}x{height} q{q}");
1407    }
1408
1409    #[test]
1410    fn encoder_recon_matches_decoder_across_sizes_and_quant() {
1411        for &(w, h) in &[
1412            (16u32, 16u32),
1413            (32, 16),
1414            (17, 9),
1415            (1, 1),
1416            (64, 48),
1417            (33, 41),
1418        ] {
1419            for &q in &[0u8, 10, 40, 80, 127] {
1420                assert_encoder_recon_matches_decoder(w, h, q);
1421            }
1422        }
1423    }
1424
1425    #[test]
1426    fn both_loop_filters_reconstruct_bit_exact() {
1427        // The simple (luma-only) and normal (luma+chroma) filters must each reconstruct identically
1428        // in the encoder and decoder — exercising both decoder filter paths on coefficient-bearing
1429        // content (so interior edges are filtered too).
1430        for simple in [true, false] {
1431            for &q in &[20u8, 60, 110] {
1432                let yuv = detailed(48, 32);
1433                let opts = EncodeOptions {
1434                    simple_filter: simple,
1435                    segmented: false,
1436                    partitions: 1,
1437                };
1438                let (bits, recon) = encode_frame_filtered(&yuv, q, opts);
1439                let dec = decode_frame(&bits).expect("decode");
1440                let (enc, dec) = (recon.to_yuv420(), dec.to_yuv420());
1441                assert_eq!(enc.y(), dec.y(), "luma simple={simple} q{q}");
1442                assert_eq!(enc.u(), dec.u(), "u simple={simple} q{q}");
1443                assert_eq!(enc.v(), dec.v(), "v simple={simple} q{q}");
1444            }
1445        }
1446    }
1447
1448    #[test]
1449    fn segmentation_round_trips_bit_exact() {
1450        // Four quantizer segments (assigned by macroblock luma mean) must reconstruct identically in
1451        // the encoder and the decoder across a range of base quantizers.
1452        for &q in &[10u8, 40, 90] {
1453            let yuv = detailed(64, 48);
1454            let opts = EncodeOptions {
1455                simple_filter: false,
1456                segmented: true,
1457                partitions: 1,
1458            };
1459            let (bits, recon) = encode_frame_filtered(&yuv, q, opts);
1460            let dec = decode_frame(&bits).expect("decode");
1461            let (enc, dec) = (recon.to_yuv420(), dec.to_yuv420());
1462            assert_eq!(enc.y(), dec.y(), "luma q{q}");
1463            assert_eq!(enc.u(), dec.u(), "u q{q}");
1464            assert_eq!(enc.v(), dec.v(), "v q{q}");
1465        }
1466    }
1467
1468    #[test]
1469    fn token_partitions_round_trip_bit_exact() {
1470        // 1/2/4/8 token partitions must each reconstruct identically; a tall image routes macroblock
1471        // rows across all eight partitions.
1472        for partitions in [1u8, 2, 4, 8] {
1473            let yuv = detailed(32, 160);
1474            let opts = EncodeOptions {
1475                simple_filter: false,
1476                segmented: false,
1477                partitions,
1478            };
1479            let (bits, recon) = encode_frame_filtered(&yuv, 30, opts);
1480            let dec = decode_frame(&bits).expect("decode");
1481            let (enc, dec) = (recon.to_yuv420(), dec.to_yuv420());
1482            assert_eq!(enc.y(), dec.y(), "luma p{partitions}");
1483            assert_eq!(enc.u(), dec.u(), "u p{partitions}");
1484            assert_eq!(enc.v(), dec.v(), "v p{partitions}");
1485        }
1486    }
1487
1488    #[test]
1489    fn decode_rejects_truncated_first_partition() {
1490        let yuv = pattern(16, 16);
1491        let (mut bitstream, _) = encode_frame(&yuv, 40);
1492        bitstream.truncate(UNCOMPRESSED_CHUNK_LEN + 1);
1493        let _ = decode_frame(&bitstream);
1494    }
1495}