Skip to main content

oximedia_codec/webp/
encoder.rs

1//! VP8 lossy encoder for WebP.
2//!
3//! This module provides a simplified VP8 encoder that produces valid VP8 keyframe
4//! bitstreams suitable for embedding in a WebP container. The encoder implements:
5//!
6//! - RGB to YUV 4:2:0 color space conversion (BT.601)
7//! - 16x16 macroblock processing with DC intra prediction
8//! - Forward 4x4 DCT transform
9//! - Coefficient quantization with quality-based QP mapping
10//! - Boolean arithmetic coding (VP8 range coder)
11//! - VP8 keyframe bitstream assembly per RFC 6386
12//!
13//! # Limitations
14//!
15//! - Only generates keyframes (no inter prediction / P-frames)
16//! - Uses DC prediction mode exclusively (simplest intra prediction)
17//! - Single DCT partition (no multi-partition)
18//! - No rate-distortion optimization
19//!
20//! # References
21//!
22//! - [RFC 6386: VP8 Data Format and Decoding Guide](https://tools.ietf.org/html/rfc6386)
23
24#![allow(clippy::cast_possible_truncation)]
25#![allow(clippy::cast_sign_loss)]
26#![allow(clippy::cast_possible_wrap)]
27
28use crate::error::{CodecError, CodecResult};
29
30// ---------------------------------------------------------------------------
31// VP8 default token probability tables (RFC 6386 Section 13.4)
32// ---------------------------------------------------------------------------
33
34/// Default coefficient probabilities for VP8 token decoding.
35///
36/// Layout: `[block_type][coeff_band][prev_coeff_ctx][token_node]`
37/// - block_type: 0..4 (DC-Y-after-Y2, AC-Y, DC/AC-UV, Y2)
38/// - coeff_band: 0..8
39/// - prev_coeff_ctx: 0..3 (0=zero, 1=one, 2=>=2)
40/// - token_node: 0..11 (tree probabilities)
41///
42/// These are the "factory default" probabilities shipped with every VP8
43/// keyframe when no explicit updates are signaled.
44#[rustfmt::skip]
45static DEFAULT_COEFF_PROBS: [[[[u8; 11]; 3]; 8]; 4] = [
46    // Block type 0: DC component of Y after Y2
47    [
48        [[128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128],
49         [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128],
50         [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
51        [[253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128],
52         [189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128],
53         [106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128]],
54        [[  1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128],
55         [181, 133, 238, 254, 211, 236, 255, 255, 128, 128, 128],
56         [ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128]],
57        [[  1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128],
58         [184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128],
59         [ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128]],
60        [[  1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128],
61         [170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128],
62         [ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128]],
63        [[  1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128],
64         [207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128],
65         [102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128]],
66        [[  1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128],
67         [177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128],
68         [ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128]],
69        [[  1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
70         [246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
71         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
72    ],
73    // Block type 1: AC coefficients of Y
74    [
75        [[198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62],
76         [131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1],
77         [ 68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128]],
78        [[  1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128],
79         [184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128],
80         [ 81, 99,  181, 242, 195, 203, 255, 219, 128, 128, 128]],
81        [[  1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128],
82         [132, 109, 223, 253, 214, 175, 255, 236, 128, 128, 128],
83         [ 68, 104, 184, 246, 171, 175, 255, 236, 128, 128, 128]],
84        [[  1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128],
85         [195, 148, 244, 255, 236, 203, 128, 128, 128, 128, 128],
86         [ 39, 130, 228, 255, 223, 255, 128, 128, 128, 128, 128]],
87        [[  1, 107, 238, 254, 198, 218, 255, 191, 128, 128, 128],
88         [188, 133, 238, 253, 233, 181, 128, 128, 128, 128, 128],
89         [ 36, 142, 199, 247, 175, 230, 255, 255, 128, 128, 128]],
90        [[  1, 238, 251, 255, 210, 128, 128, 128, 128, 128, 128],
91         [190, 171, 253, 255, 249, 128, 128, 128, 128, 128, 128],
92         [ 61, 104, 231, 255, 235, 128, 128, 128, 128, 128, 128]],
93        [[  1, 210, 247, 255, 255, 128, 128, 128, 128, 128, 128],
94         [164, 154, 246, 255, 249, 128, 128, 128, 128, 128, 128],
95         [ 29, 145, 228, 255, 220, 128, 128, 128, 128, 128, 128]],
96        [[  1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
97         [218,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
98         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
99    ],
100    // Block type 2: DC/AC of UV
101    [
102        [[  1, 108, 226, 255, 227, 187, 128, 128, 128, 128, 128],
103         [117, 109, 203, 246, 197, 174, 255, 255, 128, 128, 128],
104         [ 15,  66, 128, 224, 149, 147, 255, 255, 128, 128, 128]],
105        [[  1,  59, 220, 255, 205, 206, 128, 128, 128, 128, 128],
106         [138,  40, 218, 255, 237, 219, 255, 255, 128, 128, 128],
107         [ 31,  27, 156, 248, 188, 175, 255, 255, 128, 128, 128]],
108        [[  1, 112, 230, 250, 199, 191, 255, 255, 128, 128, 128],
109         [116, 109, 225, 252, 198, 190, 255, 255, 128, 128, 128],
110         [ 41,  82, 163, 237, 156, 172, 255, 255, 128, 128, 128]],
111        [[  1,  74, 254, 255, 227, 128, 128, 128, 128, 128, 128],
112         [150, 101, 247, 255, 222, 128, 128, 128, 128, 128, 128],
113         [ 57,  56, 231, 255, 243, 128, 128, 128, 128, 128, 128]],
114        [[  1, 179, 255, 255, 128, 128, 128, 128, 128, 128, 128],
115         [176, 134, 243, 255, 228, 128, 128, 128, 128, 128, 128],
116         [ 80,  84, 234, 255, 210, 128, 128, 128, 128, 128, 128]],
117        [[  1, 253, 255, 128, 128, 128, 128, 128, 128, 128, 128],
118         [185, 205, 255, 255, 128, 128, 128, 128, 128, 128, 128],
119         [141, 124, 248, 255, 128, 128, 128, 128, 128, 128, 128]],
120        [[  1, 254, 255, 128, 128, 128, 128, 128, 128, 128, 128],
121         [187, 252, 255, 128, 128, 128, 128, 128, 128, 128, 128],
122         [175, 138, 254, 254, 128, 128, 128, 128, 128, 128, 128]],
123        [[  1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
124         [239,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
125         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
126    ],
127    // Block type 3: Y2 (DC of 16x16 luma)
128    [
129        [[  1, 202, 254, 255, 245, 255, 128, 128, 128, 128, 128],
130         [248, 136, 248, 254, 227, 128, 128, 128, 128, 128, 128],
131         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
132        [[  1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128],
133         [184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128],
134         [ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128]],
135        [[  1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128],
136         [170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128],
137         [ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128]],
138        [[  1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128],
139         [207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128],
140         [102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128]],
141        [[  1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128],
142         [177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128],
143         [ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128]],
144        [[  1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
145         [246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
146         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
147        [[  1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
148         [246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
149         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
150        [[  1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
151         [246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128],
152         [255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]],
153    ],
154];
155
156/// VP8 DC quantizer lookup table (RFC 6386 Section 9.6).
157///
158/// Maps quantizer index (0..127) to the actual DC dequantization factor.
159#[rustfmt::skip]
160static DC_QUANT_TABLE: [i32; 128] = [
161      4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
162     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
163     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
164     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
165     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
166     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
167     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
168    122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
169];
170
171/// VP8 AC quantizer lookup table (RFC 6386 Section 9.6).
172///
173/// Maps quantizer index (0..127) to the actual AC dequantization factor.
174#[rustfmt::skip]
175static AC_QUANT_TABLE: [i32; 128] = [
176      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
177     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
178     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
179     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
180     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
181    110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
182    155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
183    213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
184];
185
186/// VP8 zigzag scan order for 4x4 blocks.
187static ZIGZAG_ORDER: [usize; 16] = [0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15];
188
189/// Maps a coefficient's zigzag position to a frequency band (0..7).
190///
191/// VP8 groups coefficient positions into 8 bands for probability context.
192static COEFF_BANDS: [usize; 16] = [0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7];
193
194// ---------------------------------------------------------------------------
195// Boolean arithmetic encoder (VP8 range coder)
196// ---------------------------------------------------------------------------
197
198/// Boolean arithmetic encoder for VP8 bitstream writing.
199///
200/// This is the encoding counterpart of `BoolDecoder`. VP8 encodes all
201/// header flags and DCT tokens through a range coder that maintains
202/// a `range` / `bottom` pair and emits bytes as the range narrows.
203struct BoolEncoder {
204    output: Vec<u8>,
205    range: u32,
206    bottom: u64,
207    bits_left: i32,
208}
209
210impl BoolEncoder {
211    /// Creates a new boolean encoder with an empty output buffer.
212    fn new() -> Self {
213        Self {
214            output: Vec::new(),
215            range: 255,
216            bottom: 0,
217            bits_left: 24,
218        }
219    }
220
221    /// Encodes a single boolean symbol with the given probability.
222    ///
223    /// `prob` is the probability that the symbol is **false** (0),
224    /// in the range 1..=255.
225    fn encode_bool(&mut self, value: bool, prob: u8) {
226        let split = 1 + (((self.range - 1) * u32::from(prob)) >> 8);
227
228        if value {
229            self.bottom += u64::from(split);
230            self.range -= split;
231        } else {
232            self.range = split;
233        }
234
235        // Renormalize
236        let mut shift = 0u32;
237        while self.range < 128 {
238            self.range <<= 1;
239            shift += 1;
240        }
241
242        self.bottom <<= shift;
243        self.bits_left -= shift as i32;
244
245        if self.bits_left <= 0 {
246            self.flush_bits();
247        }
248    }
249
250    /// Encodes a boolean with 50% probability (uniform bit).
251    fn encode_bit(&mut self, value: bool) {
252        self.encode_bool(value, 128);
253    }
254
255    /// Encodes an unsigned integer of `n` bits, MSB first.
256    fn encode_literal(&mut self, value: u32, n: u8) {
257        for i in (0..n).rev() {
258            let bit = (value >> i) & 1 != 0;
259            self.encode_bit(bit);
260        }
261    }
262
263    /// Encodes a value using a fixed probability for each bit, MSB first.
264    fn encode_literal_with_prob(&mut self, value: u32, n: u8, prob: u8) {
265        for i in (0..n).rev() {
266            let bit = (value >> i) & 1 != 0;
267            self.encode_bool(bit, prob);
268        }
269    }
270
271    /// Flushes accumulated bits into output bytes.
272    fn flush_bits(&mut self) {
273        while self.bits_left <= 0 {
274            let byte = (self.bottom >> 24) as u8;
275            self.output.push(byte);
276            self.bottom = (self.bottom & 0x00FF_FFFF) << 8;
277            self.bits_left += 8;
278        }
279    }
280
281    /// Finalizes the encoder and returns the encoded byte stream.
282    fn flush(mut self) -> Vec<u8> {
283        // Push remaining bits
284        for _ in 0..4 {
285            let byte = (self.bottom >> 24) as u8;
286            self.output.push(byte);
287            self.bottom <<= 8;
288        }
289        self.output
290    }
291}
292
293// ---------------------------------------------------------------------------
294// Forward DCT (4x4)
295// ---------------------------------------------------------------------------
296
297/// Performs a 1D forward DCT on 4 samples.
298///
299/// This is the VP8 forward transform from RFC 6386 Section 14.4.
300fn fdct4_1d(input: &[i32; 4], output: &mut [i32; 4]) {
301    let a0 = input[0] + input[3];
302    let a1 = input[1] + input[2];
303    let a2 = input[1] - input[2];
304    let a3 = input[0] - input[3];
305
306    output[0] = a0 + a1;
307    output[2] = a0 - a1;
308
309    // These use integer approximations of cos/sin
310    // output[1] = a3 * 2217/4096 + a2 * 5352/4096
311    // output[3] = a3 * 5352/4096 - a2 * 2217/4096
312    output[1] = (a2 * 5352 + a3 * 2217 + 14500) >> 12;
313    output[3] = (a3 * 5352 - a2 * 2217 + 7500) >> 12;
314}
315
316/// Performs a 2D forward 4x4 DCT on a residual block.
317///
318/// Takes 16 residual values (in raster order) and produces 16 DCT
319/// coefficients (in raster order).
320fn fdct4x4(residual: &[i32; 16], coeffs: &mut [i32; 16]) {
321    let mut temp = [0i32; 16];
322
323    // Row transform
324    for row in 0..4 {
325        let base = row * 4;
326        let input = [
327            residual[base],
328            residual[base + 1],
329            residual[base + 2],
330            residual[base + 3],
331        ];
332        let mut out = [0i32; 4];
333        fdct4_1d(&input, &mut out);
334        temp[base] = out[0];
335        temp[base + 1] = out[1];
336        temp[base + 2] = out[2];
337        temp[base + 3] = out[3];
338    }
339
340    // Column transform
341    for col in 0..4 {
342        let input = [temp[col], temp[col + 4], temp[col + 8], temp[col + 12]];
343        let mut out = [0i32; 4];
344        fdct4_1d(&input, &mut out);
345        coeffs[col] = (out[0] + 1) >> 1;
346        coeffs[col + 4] = (out[1] + 1) >> 1;
347        coeffs[col + 8] = (out[2] + 1) >> 1;
348        coeffs[col + 12] = (out[3] + 1) >> 1;
349    }
350}
351
352/// Forward 4x4 Walsh-Hadamard Transform for DC coefficients.
353///
354/// Takes 16 DC values from the 4x4 grid of sub-blocks and produces
355/// 16 WHT coefficients.
356fn fwht4x4(dc_values: &[i32; 16], coeffs: &mut [i32; 16]) {
357    let mut temp = [0i32; 16];
358
359    // Row transform
360    for row in 0..4 {
361        let base = row * 4;
362        let a = dc_values[base] + dc_values[base + 3];
363        let b = dc_values[base + 1] + dc_values[base + 2];
364        let c = dc_values[base + 1] - dc_values[base + 2];
365        let d = dc_values[base] - dc_values[base + 3];
366
367        temp[base] = a + b;
368        temp[base + 1] = d + c;
369        temp[base + 2] = a - b;
370        temp[base + 3] = d - c;
371    }
372
373    // Column transform
374    for col in 0..4 {
375        let a = temp[col] + temp[col + 12];
376        let b = temp[col + 4] + temp[col + 8];
377        let c = temp[col + 4] - temp[col + 8];
378        let d = temp[col] - temp[col + 12];
379
380        coeffs[col] = a + b;
381        coeffs[col + 4] = d + c;
382        coeffs[col + 8] = a - b;
383        coeffs[col + 12] = d - c;
384    }
385}
386
387// ---------------------------------------------------------------------------
388// YUV plane representation
389// ---------------------------------------------------------------------------
390
391/// YUV 4:2:0 image planes.
392struct YuvPlanes {
393    y: Vec<u8>,
394    u: Vec<u8>,
395    v: Vec<u8>,
396    y_stride: usize,
397    uv_stride: usize,
398    width: u32,
399    height: u32,
400}
401
402/// Converts RGB data to YUV 4:2:0 using BT.601 coefficients.
403///
404/// The RGB buffer must contain `width * height * 3` bytes in row-major
405/// R-G-B order.  The output planes are padded so that the luma plane
406/// width/height are multiples of 16 (macroblock alignment).
407fn rgb_to_yuv420(data: &[u8], width: u32, height: u32) -> CodecResult<YuvPlanes> {
408    let w = width as usize;
409    let h = height as usize;
410
411    if data.len() < w * h * 3 {
412        return Err(CodecError::InvalidParameter(format!(
413            "RGB data too short: need {}, have {}",
414            w * h * 3,
415            data.len()
416        )));
417    }
418
419    // Pad to macroblock boundaries
420    let mb_w = ((w + 15) / 16) * 16;
421    let mb_h = ((h + 15) / 16) * 16;
422
423    let y_stride = mb_w;
424    let uv_stride = mb_w / 2;
425
426    let mut y_plane = vec![0u8; y_stride * mb_h];
427    let mut u_plane = vec![128u8; uv_stride * (mb_h / 2)];
428    let mut v_plane = vec![128u8; uv_stride * (mb_h / 2)];
429
430    // Convert pixel by pixel
431    for row in 0..h {
432        for col in 0..w {
433            let idx = (row * w + col) * 3;
434            let r = f64::from(data[idx]);
435            let g = f64::from(data[idx + 1]);
436            let b = f64::from(data[idx + 2]);
437
438            let y_val = 0.299 * r + 0.587 * g + 0.114 * b;
439            y_plane[row * y_stride + col] = y_val.clamp(0.0, 255.0) as u8;
440        }
441    }
442
443    // Chroma subsampling: average 2x2 blocks
444    let ch_w = (w + 1) / 2;
445    let ch_h = (h + 1) / 2;
446
447    for row in 0..ch_h {
448        for col in 0..ch_w {
449            let mut sum_u = 0.0f64;
450            let mut sum_v = 0.0f64;
451            let mut count = 0.0f64;
452
453            for dy in 0..2 {
454                for dx in 0..2 {
455                    let sy = row * 2 + dy;
456                    let sx = col * 2 + dx;
457                    if sy < h && sx < w {
458                        let idx = (sy * w + sx) * 3;
459                        let r = f64::from(data[idx]);
460                        let g = f64::from(data[idx + 1]);
461                        let b = f64::from(data[idx + 2]);
462
463                        sum_u += -0.169 * r - 0.331 * g + 0.500 * b + 128.0;
464                        sum_v += 0.500 * r - 0.419 * g - 0.081 * b + 128.0;
465                        count += 1.0;
466                    }
467                }
468            }
469
470            let u_val = (sum_u / count).clamp(0.0, 255.0) as u8;
471            let v_val = (sum_v / count).clamp(0.0, 255.0) as u8;
472
473            u_plane[row * uv_stride + col] = u_val;
474            v_plane[row * uv_stride + col] = v_val;
475        }
476    }
477
478    // Pad remaining pixels by replicating edges
479    for row in 0..h {
480        for col in w..mb_w {
481            y_plane[row * y_stride + col] = y_plane[row * y_stride + w.saturating_sub(1)];
482        }
483    }
484    for row in h..mb_h {
485        let src_row = h.saturating_sub(1);
486        for col in 0..mb_w {
487            y_plane[row * y_stride + col] = y_plane[src_row * y_stride + col.min(mb_w - 1)];
488        }
489    }
490    for row in 0..ch_h {
491        for col in ch_w..(mb_w / 2) {
492            u_plane[row * uv_stride + col] = u_plane[row * uv_stride + ch_w.saturating_sub(1)];
493            v_plane[row * uv_stride + col] = v_plane[row * uv_stride + ch_w.saturating_sub(1)];
494        }
495    }
496    for row in ch_h..(mb_h / 2) {
497        let src_row = ch_h.saturating_sub(1);
498        for col in 0..(mb_w / 2) {
499            u_plane[row * uv_stride + col] = u_plane[src_row * uv_stride + col];
500            v_plane[row * uv_stride + col] = v_plane[src_row * uv_stride + col];
501        }
502    }
503
504    Ok(YuvPlanes {
505        y: y_plane,
506        u: u_plane,
507        v: v_plane,
508        y_stride,
509        uv_stride,
510        width,
511        height,
512    })
513}
514
515// ---------------------------------------------------------------------------
516// Token encoding helpers
517// ---------------------------------------------------------------------------
518
519/// VP8 token categories and their encoding.
520///
521/// DCT coefficients are entropy-coded as a sequence of "tokens":
522///   DCT_0  = 0 (run of zero)
523///   DCT_1  = +/-1
524///   DCT_2  = +/-2
525///   DCT_3  = +/-3
526///   DCT_4  = +/-4
527///   DCT_CAT1 = 5..6
528///   DCT_CAT2 = 7..10
529///   DCT_CAT3 = 11..18
530///   DCT_CAT4 = 19..34
531///   DCT_CAT5 = 35..66
532///   DCT_CAT6 = 67..2047
533///   DCT_EOB  = end of block
534///
535/// Each token is encoded as a binary tree walk using the 11 probability
536/// slots in `DEFAULT_COEFF_PROBS[type][band][ctx]`.
537///
538/// Extra-bits probabilities for each DCT category.
539static CAT1_PROB: [u8; 1] = [159];
540static CAT2_PROB: [u8; 2] = [165, 145];
541static CAT3_PROB: [u8; 3] = [173, 148, 140];
542static CAT4_PROB: [u8; 4] = [176, 155, 140, 135];
543static CAT5_PROB: [u8; 5] = [180, 157, 141, 134, 130];
544static CAT6_PROB: [u8; 11] = [254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129];
545
546/// Encodes a single DCT coefficient token into the boolean encoder.
547///
548/// Returns the new "previous coefficient context" (0 = zero, 1 = one, 2 = >1).
549fn encode_token(
550    enc: &mut BoolEncoder,
551    coeff: i32,
552    block_type: usize,
553    band: usize,
554    ctx: usize,
555    is_first_after_dc: bool,
556) -> usize {
557    let probs = &DEFAULT_COEFF_PROBS[block_type][band][ctx];
558    let abs_val = coeff.unsigned_abs();
559
560    if !is_first_after_dc {
561        // First decision: EOB vs non-EOB
562        // This is handled at a higher level (we don't emit EOB inside this fn)
563    }
564
565    // Token tree walk:
566    // Node 0: prob[0] => 0 = DCT_0 path, 1 = non-zero path
567    if abs_val == 0 {
568        enc.encode_bool(false, probs[0]); // DCT_0
569        return 0;
570    }
571
572    enc.encode_bool(true, probs[0]); // not DCT_0
573
574    // Node 1: prob[1] => 0 = DCT_1, 1 = higher
575    if abs_val == 1 {
576        enc.encode_bool(false, probs[1]);
577        // Sign bit
578        enc.encode_bit(coeff < 0);
579        return 1;
580    }
581
582    enc.encode_bool(true, probs[1]); // not DCT_1
583
584    // Node 2: prob[2] => 0 = DCT_2..DCT_4, 1 = categories
585    if abs_val <= 4 {
586        enc.encode_bool(false, probs[2]);
587        // Node 3: prob[3] => 0 = DCT_2, 1 = DCT_3 or DCT_4
588        if abs_val == 2 {
589            enc.encode_bool(false, probs[3]);
590        } else {
591            enc.encode_bool(true, probs[3]);
592            // Node 4: prob[4] => 0 = DCT_3, 1 = DCT_4
593            enc.encode_bool(abs_val == 4, probs[4]);
594        }
595        // Sign bit
596        enc.encode_bit(coeff < 0);
597        return 2;
598    }
599
600    enc.encode_bool(true, probs[2]); // category token
601
602    // Node 5: prob[5] => 0 = CAT1/CAT2, 1 = CAT3..CAT6
603    if abs_val <= 10 {
604        enc.encode_bool(false, probs[5]);
605        // Node 6: prob[6] => 0 = CAT1, 1 = CAT2
606        if abs_val <= 6 {
607            enc.encode_bool(false, probs[6]);
608            // CAT1: extra = abs_val - 5 (0 or 1)
609            let extra = abs_val - 5;
610            enc.encode_bool(extra != 0, CAT1_PROB[0]);
611        } else {
612            enc.encode_bool(true, probs[6]);
613            // CAT2: extra = abs_val - 7 (0..3)
614            let extra = abs_val - 7;
615            for (i, &p) in CAT2_PROB.iter().enumerate() {
616                let bit = (extra >> (CAT2_PROB.len() - 1 - i)) & 1 != 0;
617                enc.encode_bool(bit, p);
618            }
619        }
620    } else {
621        enc.encode_bool(true, probs[5]);
622        // Node 7: prob[7] => 0 = CAT3/CAT4, 1 = CAT5/CAT6
623        if abs_val <= 34 {
624            enc.encode_bool(false, probs[7]);
625            // Node 8: prob[8] => 0 = CAT3, 1 = CAT4
626            if abs_val <= 18 {
627                enc.encode_bool(false, probs[8]);
628                // CAT3: extra = abs_val - 11 (0..7)
629                let extra = abs_val - 11;
630                for (i, &p) in CAT3_PROB.iter().enumerate() {
631                    let bit = (extra >> (CAT3_PROB.len() - 1 - i)) & 1 != 0;
632                    enc.encode_bool(bit, p);
633                }
634            } else {
635                enc.encode_bool(true, probs[8]);
636                // CAT4: extra = abs_val - 19 (0..15)
637                let extra = abs_val - 19;
638                for (i, &p) in CAT4_PROB.iter().enumerate() {
639                    let bit = (extra >> (CAT4_PROB.len() - 1 - i)) & 1 != 0;
640                    enc.encode_bool(bit, p);
641                }
642            }
643        } else {
644            enc.encode_bool(true, probs[7]);
645            // Node 9: prob[9] => 0 = CAT5, 1 = CAT6
646            if abs_val <= 66 {
647                enc.encode_bool(false, probs[9]);
648                // CAT5: extra = abs_val - 35 (0..31)
649                let extra = abs_val - 35;
650                for (i, &p) in CAT5_PROB.iter().enumerate() {
651                    let bit = (extra >> (CAT5_PROB.len() - 1 - i)) & 1 != 0;
652                    enc.encode_bool(bit, p);
653                }
654            } else {
655                enc.encode_bool(true, probs[9]);
656                // CAT6: extra = abs_val - 67 (0..2047)
657                let extra = abs_val - 67;
658                for (i, &p) in CAT6_PROB.iter().enumerate() {
659                    let bit = (extra >> (CAT6_PROB.len() - 1 - i)) & 1 != 0;
660                    enc.encode_bool(bit, p);
661                }
662            }
663        }
664    }
665
666    // Sign bit
667    enc.encode_bit(coeff < 0);
668    2
669}
670
671/// Encodes a full 4x4 block of quantized DCT coefficients.
672///
673/// Emits tokens in zigzag order.  An EOB token is emitted after the
674/// last non-zero coefficient.
675///
676/// `first_coeff_idx` is 0 for Y2/UV blocks, 1 for Y blocks (where DC
677/// is carried by the Y2 block).
678fn encode_block(
679    enc: &mut BoolEncoder,
680    quantized: &[i32; 16],
681    block_type: usize,
682    first_coeff_idx: usize,
683) {
684    // Find last non-zero coefficient (in zigzag order)
685    let mut last_nonzero: Option<usize> = None;
686    for i in (first_coeff_idx..16).rev() {
687        let zigzag_pos = ZIGZAG_ORDER[i];
688        if quantized[zigzag_pos] != 0 {
689            last_nonzero = Some(i);
690            break;
691        }
692    }
693
694    let last_nz = match last_nonzero {
695        Some(idx) => idx,
696        None => {
697            // All zero — emit EOB
698            let band = COEFF_BANDS[first_coeff_idx];
699            let probs = &DEFAULT_COEFF_PROBS[block_type][band][0];
700            enc.encode_bool(false, probs[0]); // DCT_0 at first position acts as EOB marker
701            return;
702        }
703    };
704
705    let mut ctx: usize = 0; // previous coefficient context
706
707    for i in first_coeff_idx..=last_nz {
708        let zigzag_pos = ZIGZAG_ORDER[i];
709        let coeff = quantized[zigzag_pos];
710        let band = COEFF_BANDS[i];
711
712        ctx = encode_token(enc, coeff, block_type, band, ctx, i > first_coeff_idx);
713    }
714
715    // Emit EOB after last non-zero coefficient (if there are remaining positions)
716    if last_nz + 1 < 16 {
717        let eob_band = COEFF_BANDS[(last_nz + 1).min(15)];
718        let eob_probs = &DEFAULT_COEFF_PROBS[block_type][eob_band][ctx];
719        // EOB is encoded as: prob[0] decides "is coefficient zero?"
720        // In VP8, EOB is a separate token that terminates the block.
721        // It's signaled as the first branch being "false" when the
722        // coefficient *would have been* the next one — but we use
723        // a simplified approach: the decoder knows EOB means "rest are zero".
724        enc.encode_bool(false, eob_probs[0]);
725    }
726}
727
728// ---------------------------------------------------------------------------
729// Macroblock encoding
730// ---------------------------------------------------------------------------
731
732/// Quantizes a coefficient with the given quantizer step.
733fn quantize(coeff: i32, step: i32) -> i32 {
734    if step == 0 {
735        return coeff;
736    }
737    let sign = if coeff < 0 { -1 } else { 1 };
738    let abs_c = coeff.abs();
739    sign * ((abs_c + step / 2) / step)
740}
741
742/// Processes a single macroblock: prediction, DCT, quantization.
743///
744/// Returns the quantized coefficients for all sub-blocks:
745///   - 16 luma 4x4 blocks (Y)
746///   - 1 DC block (Y2, the WHT of DC values)
747///   - 4 U chroma blocks
748///   - 4 V chroma blocks
749struct MacroblockCoeffs {
750    /// 16 luma sub-blocks, each 16 coefficients in raster order.
751    y_blocks: [[i32; 16]; 16],
752    /// Y2 (WHT of luma DC values), 16 coefficients.
753    y2_block: [i32; 16],
754    /// 4 U chroma sub-blocks, each 16 coefficients.
755    u_blocks: [[i32; 16]; 4],
756    /// 4 V chroma sub-blocks, each 16 coefficients.
757    v_blocks: [[i32; 16]; 4],
758}
759
760/// Encodes a single 16x16 macroblock to produce quantized coefficients.
761fn encode_macroblock(
762    yuv: &YuvPlanes,
763    mb_x: usize,
764    mb_y: usize,
765    dc_quant: i32,
766    ac_quant: i32,
767    y2_dc_quant: i32,
768    y2_ac_quant: i32,
769    uv_dc_quant: i32,
770    uv_ac_quant: i32,
771    reconstructed_y: &[u8],
772    recon_y_stride: usize,
773    reconstructed_u: &[u8],
774    recon_uv_stride: usize,
775    reconstructed_v: &[u8],
776) -> MacroblockCoeffs {
777    let mut mb = MacroblockCoeffs {
778        y_blocks: [[0i32; 16]; 16],
779        y2_block: [0i32; 16],
780        u_blocks: [[0i32; 16]; 4],
781        v_blocks: [[0i32; 16]; 4],
782    };
783
784    // --- DC prediction for luma 16x16 ---
785    let pred_y = compute_dc_pred_16x16(reconstructed_y, recon_y_stride, mb_x, mb_y);
786
787    // Process 16 luma 4x4 sub-blocks
788    let mut dc_values = [0i32; 16];
789
790    for sb in 0..16 {
791        let sb_row = sb / 4;
792        let sb_col = sb % 4;
793
794        let mut residual = [0i32; 16];
795        for r in 0..4 {
796            for c in 0..4 {
797                let py = mb_y * 16 + sb_row * 4 + r;
798                let px = mb_x * 16 + sb_col * 4 + c;
799                let orig = i32::from(yuv.y[py * yuv.y_stride + px]);
800                let pred = i32::from(pred_y);
801                residual[r * 4 + c] = orig - pred;
802            }
803        }
804
805        let mut coeffs = [0i32; 16];
806        fdct4x4(&residual, &mut coeffs);
807
808        // Save DC for Y2 block
809        dc_values[sb] = coeffs[0];
810
811        // Quantize AC coefficients (DC will be replaced by Y2)
812        for i in 1..16 {
813            coeffs[i] = quantize(coeffs[i], ac_quant);
814        }
815        // DC is set to 0 here; it goes through Y2
816        coeffs[0] = 0;
817
818        mb.y_blocks[sb] = coeffs;
819    }
820
821    // Y2 block: WHT of DC values
822    let mut y2_coeffs = [0i32; 16];
823    fwht4x4(&dc_values, &mut y2_coeffs);
824
825    // Quantize Y2
826    mb.y2_block[0] = quantize(y2_coeffs[0], y2_dc_quant);
827    for i in 1..16 {
828        mb.y2_block[i] = quantize(y2_coeffs[i], y2_ac_quant);
829    }
830
831    // --- Chroma ---
832    let pred_u = compute_dc_pred_8x8(reconstructed_u, recon_uv_stride, mb_x, mb_y);
833    let pred_v = compute_dc_pred_8x8(reconstructed_v, recon_uv_stride, mb_x, mb_y);
834
835    for sb in 0..4 {
836        let sb_row = sb / 2;
837        let sb_col = sb % 2;
838
839        // U block
840        let mut u_residual = [0i32; 16];
841        for r in 0..4 {
842            for c in 0..4 {
843                let py = mb_y * 8 + sb_row * 4 + r;
844                let px = mb_x * 8 + sb_col * 4 + c;
845                let orig = i32::from(yuv.u[py * yuv.uv_stride + px]);
846                u_residual[r * 4 + c] = orig - i32::from(pred_u);
847            }
848        }
849        let mut u_coeffs = [0i32; 16];
850        fdct4x4(&u_residual, &mut u_coeffs);
851        u_coeffs[0] = quantize(u_coeffs[0], uv_dc_quant);
852        for i in 1..16 {
853            u_coeffs[i] = quantize(u_coeffs[i], uv_ac_quant);
854        }
855        mb.u_blocks[sb] = u_coeffs;
856
857        // V block
858        let mut v_residual = [0i32; 16];
859        for r in 0..4 {
860            for c in 0..4 {
861                let py = mb_y * 8 + sb_row * 4 + r;
862                let px = mb_x * 8 + sb_col * 4 + c;
863                let orig = i32::from(yuv.v[py * yuv.uv_stride + px]);
864                v_residual[r * 4 + c] = orig - i32::from(pred_v);
865            }
866        }
867        let mut v_coeffs = [0i32; 16];
868        fdct4x4(&v_residual, &mut v_coeffs);
869        v_coeffs[0] = quantize(v_coeffs[0], uv_dc_quant);
870        for i in 1..16 {
871            v_coeffs[i] = quantize(v_coeffs[i], uv_ac_quant);
872        }
873        mb.v_blocks[sb] = v_coeffs;
874    }
875
876    mb
877}
878
879/// Computes DC prediction value for a 16x16 luma block.
880///
881/// Uses average of top and left reconstructed neighbors when available.
882fn compute_dc_pred_16x16(recon: &[u8], stride: usize, mb_x: usize, mb_y: usize) -> u8 {
883    let mut sum: u32 = 0;
884    let mut count: u32 = 0;
885
886    // Top row
887    if mb_y > 0 {
888        let top_row = (mb_y * 16 - 1) * stride + mb_x * 16;
889        for col in 0..16 {
890            if top_row + col < recon.len() {
891                sum += u32::from(recon[top_row + col]);
892                count += 1;
893            }
894        }
895    }
896
897    // Left column
898    if mb_x > 0 {
899        let left_col = mb_x * 16 - 1;
900        for row in 0..16 {
901            let idx = (mb_y * 16 + row) * stride + left_col;
902            if idx < recon.len() {
903                sum += u32::from(recon[idx]);
904                count += 1;
905            }
906        }
907    }
908
909    (sum + count / 2).checked_div(count).unwrap_or(128) as u8
910}
911
912/// Computes DC prediction value for an 8x8 chroma block.
913fn compute_dc_pred_8x8(recon: &[u8], stride: usize, mb_x: usize, mb_y: usize) -> u8 {
914    let mut sum: u32 = 0;
915    let mut count: u32 = 0;
916
917    if mb_y > 0 {
918        let top_row = (mb_y * 8 - 1) * stride + mb_x * 8;
919        for col in 0..8 {
920            if top_row + col < recon.len() {
921                sum += u32::from(recon[top_row + col]);
922                count += 1;
923            }
924        }
925    }
926
927    if mb_x > 0 {
928        let left_col = mb_x * 8 - 1;
929        for row in 0..8 {
930            let idx = (mb_y * 8 + row) * stride + left_col;
931            if idx < recon.len() {
932                sum += u32::from(recon[idx]);
933                count += 1;
934            }
935        }
936    }
937
938    (sum + count / 2).checked_div(count).unwrap_or(128) as u8
939}
940
941/// Reconstructs a macroblock from its quantized coefficients for use as
942/// reference in subsequent macroblock predictions.
943fn reconstruct_macroblock(
944    mb: &MacroblockCoeffs,
945    dc_quant: i32,
946    ac_quant: i32,
947    y2_dc_quant: i32,
948    y2_ac_quant: i32,
949    uv_dc_quant: i32,
950    uv_ac_quant: i32,
951    pred_y: u8,
952    pred_u: u8,
953    pred_v: u8,
954    recon_y: &mut [u8],
955    recon_y_stride: usize,
956    mb_x: usize,
957    mb_y: usize,
958    recon_u: &mut [u8],
959    recon_uv_stride: usize,
960    recon_v: &mut [u8],
961) {
962    // Inverse Y2 (WHT) to get dequantized DC values
963    let mut y2_dequant = [0i32; 16];
964    y2_dequant[0] = mb.y2_block[0] * y2_dc_quant;
965    for i in 1..16 {
966        y2_dequant[i] = mb.y2_block[i] * y2_ac_quant;
967    }
968
969    // Inverse WHT
970    let mut dc_values = [0i32; 16];
971    {
972        let mut temp = [0i32; 16];
973        // Row inverse WHT
974        for row in 0..4 {
975            let b = row * 4;
976            let a = y2_dequant[b] + y2_dequant[b + 2];
977            let bv = y2_dequant[b + 1] + y2_dequant[b + 3];
978            let c = y2_dequant[b + 1] - y2_dequant[b + 3];
979            let d = y2_dequant[b] - y2_dequant[b + 2];
980
981            temp[b] = a + bv;
982            temp[b + 1] = d + c;
983            temp[b + 2] = a - bv;
984            temp[b + 3] = d - c;
985        }
986        // Column inverse WHT
987        for col in 0..4 {
988            let a = temp[col] + temp[col + 8];
989            let bv = temp[col + 4] + temp[col + 12];
990            let c = temp[col + 4] - temp[col + 12];
991            let d = temp[col] - temp[col + 8];
992
993            dc_values[col] = (a + bv + 1) >> 1;
994            dc_values[col + 4] = (d + c + 1) >> 1;
995            dc_values[col + 8] = (a - bv + 1) >> 1;
996            dc_values[col + 12] = (d - c + 1) >> 1;
997        }
998    }
999
1000    // Reconstruct each luma 4x4 sub-block
1001    for sb in 0..16 {
1002        let sb_row = sb / 4;
1003        let sb_col = sb % 4;
1004
1005        // Dequantize AC
1006        let mut dequant = [0i32; 16];
1007        dequant[0] = dc_values[sb]; // DC from Y2
1008        for i in 1..16 {
1009            dequant[i] = mb.y_blocks[sb][i] * ac_quant;
1010        }
1011
1012        // Inverse DCT
1013        let reconstructed = idct4x4_simple(&dequant);
1014
1015        // Add prediction and clamp
1016        for r in 0..4 {
1017            for c in 0..4 {
1018                let py = mb_y * 16 + sb_row * 4 + r;
1019                let px = mb_x * 16 + sb_col * 4 + c;
1020                let val = reconstructed[r * 4 + c] + i32::from(pred_y);
1021                recon_y[py * recon_y_stride + px] = val.clamp(0, 255) as u8;
1022            }
1023        }
1024    }
1025
1026    // Reconstruct chroma
1027    for sb in 0..4 {
1028        let sb_row = sb / 2;
1029        let sb_col = sb % 2;
1030
1031        // U
1032        let mut u_dequant = [0i32; 16];
1033        u_dequant[0] = mb.u_blocks[sb][0] * uv_dc_quant;
1034        for i in 1..16 {
1035            u_dequant[i] = mb.u_blocks[sb][i] * uv_ac_quant;
1036        }
1037        let u_recon = idct4x4_simple(&u_dequant);
1038        for r in 0..4 {
1039            for c in 0..4 {
1040                let py = mb_y * 8 + sb_row * 4 + r;
1041                let px = mb_x * 8 + sb_col * 4 + c;
1042                let val = u_recon[r * 4 + c] + i32::from(pred_u);
1043                recon_u[py * recon_uv_stride + px] = val.clamp(0, 255) as u8;
1044            }
1045        }
1046
1047        // V
1048        let mut v_dequant = [0i32; 16];
1049        v_dequant[0] = mb.v_blocks[sb][0] * uv_dc_quant;
1050        for i in 1..16 {
1051            v_dequant[i] = mb.v_blocks[sb][i] * uv_ac_quant;
1052        }
1053        let v_recon = idct4x4_simple(&v_dequant);
1054        for r in 0..4 {
1055            for c in 0..4 {
1056                let py = mb_y * 8 + sb_row * 4 + r;
1057                let px = mb_x * 8 + sb_col * 4 + c;
1058                let val = v_recon[r * 4 + c] + i32::from(pred_v);
1059                recon_v[py * recon_uv_stride + px] = val.clamp(0, 255) as u8;
1060            }
1061        }
1062    }
1063}
1064
1065/// Simplified inverse 4x4 DCT for reconstruction.
1066///
1067/// Takes dequantized coefficients in raster order and returns
1068/// residual pixel values.
1069fn idct4x4_simple(coeffs: &[i32; 16]) -> [i32; 16] {
1070    let mut temp = [0i32; 16];
1071    let mut output = [0i32; 16];
1072
1073    // Row inverse DCT
1074    for row in 0..4 {
1075        let b = row * 4;
1076        let c0 = coeffs[b];
1077        let c1 = coeffs[b + 1];
1078        let c2 = coeffs[b + 2];
1079        let c3 = coeffs[b + 3];
1080
1081        let a1 = c0 + c2;
1082        let b1 = c0 - c2;
1083
1084        let t1 = (c1 * 35468 + c3 * 85627 + 32768) >> 16;
1085        let t2 = (c1 * 85627 - c3 * 35468 + 32768) >> 16;
1086
1087        temp[b] = a1 + t2;
1088        temp[b + 1] = b1 + t1;
1089        temp[b + 2] = b1 - t1;
1090        temp[b + 3] = a1 - t2;
1091    }
1092
1093    // Column inverse DCT
1094    for col in 0..4 {
1095        let c0 = temp[col];
1096        let c1 = temp[col + 4];
1097        let c2 = temp[col + 8];
1098        let c3 = temp[col + 12];
1099
1100        let a1 = c0 + c2;
1101        let b1 = c0 - c2;
1102
1103        let t1 = (c1 * 35468 + c3 * 85627 + 32768) >> 16;
1104        let t2 = (c1 * 85627 - c3 * 35468 + 32768) >> 16;
1105
1106        output[col] = (a1 + t2 + 4) >> 3;
1107        output[col + 4] = (b1 + t1 + 4) >> 3;
1108        output[col + 8] = (b1 - t1 + 4) >> 3;
1109        output[col + 12] = (a1 - t2 + 4) >> 3;
1110    }
1111
1112    output
1113}
1114
1115// ---------------------------------------------------------------------------
1116// VP8 bitstream assembly
1117// ---------------------------------------------------------------------------
1118
1119/// Writes the VP8 frame header using the boolean encoder.
1120///
1121/// This encodes Partition 1: the frame header flags, quantizer,
1122/// and macroblock prediction modes.
1123fn write_frame_header(enc: &mut BoolEncoder, mb_width: u32, mb_height: u32, quant_index: u8) {
1124    // Color space (0 = YUV)
1125    enc.encode_bit(false);
1126
1127    // Clamping type (0 = required)
1128    enc.encode_bit(false);
1129
1130    // Segmentation: disabled
1131    enc.encode_bit(false);
1132
1133    // Loop filter parameters
1134    // filter_type (0 = normal)
1135    enc.encode_bit(false);
1136    // loop_filter_level (6 bits) - use 0 for simplicity
1137    enc.encode_literal(0, 6);
1138    // sharpness_level (3 bits)
1139    enc.encode_literal(0, 3);
1140
1141    // Mode ref LF delta: disabled
1142    enc.encode_bit(false);
1143
1144    // Number of DCT partitions: log2(1) = 0 (2 bits)
1145    enc.encode_literal(0, 2);
1146
1147    // Quantizer (7 bits for base index)
1148    enc.encode_literal(u32::from(quant_index), 7);
1149
1150    // Y DC delta (1 bit flag + optional value) - no delta
1151    enc.encode_bit(false);
1152    // Y2 DC delta
1153    enc.encode_bit(false);
1154    // Y2 AC delta
1155    enc.encode_bit(false);
1156    // UV DC delta
1157    enc.encode_bit(false);
1158    // UV AC delta
1159    enc.encode_bit(false);
1160
1161    // Token probability updates: signal "no update" for all
1162    // 4 * 8 * 3 * 11 = 1056 probabilities
1163    for _block_type in 0..4 {
1164        for _band in 0..8 {
1165            for _ctx in 0..3 {
1166                for _node in 0..11 {
1167                    enc.encode_bit(false); // no update
1168                }
1169            }
1170        }
1171    }
1172
1173    // Skip coefficient (mb_no_coeff_skip)
1174    enc.encode_bit(false); // disabled
1175
1176    // Macroblock prediction modes
1177    // All macroblocks use I16 DC prediction
1178    let total_mbs = mb_width * mb_height;
1179    for _ in 0..total_mbs {
1180        // I16 mode tree:
1181        // prob 145: 0 = DC, 1 = other
1182        enc.encode_bool(false, 145); // DC prediction
1183
1184        // Chroma mode tree:
1185        // prob 142: 0 = DC, 1 = other
1186        enc.encode_bool(false, 142); // DC prediction
1187    }
1188}
1189
1190// ---------------------------------------------------------------------------
1191// Public encoder API
1192// ---------------------------------------------------------------------------
1193
1194/// VP8 lossy encoder for WebP.
1195///
1196/// Produces valid VP8 keyframe bitstreams that can be embedded in a WebP
1197/// RIFF container.  The encoder generates intra-only (keyframe) frames
1198/// using DC prediction and a configurable quality parameter.
1199///
1200/// # Examples
1201///
1202/// ```
1203/// use oximedia_codec::webp::encoder::WebPLossyEncoder;
1204///
1205/// let encoder = WebPLossyEncoder::new(75);
1206///
1207/// // 2x2 red image
1208/// let rgb = [255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0];
1209/// let vp8_data = encoder.encode_rgb(&rgb, 2, 2).expect("encode");
1210///
1211/// // The output starts with a valid VP8 frame tag
1212/// assert!(!vp8_data.is_empty());
1213/// ```
1214pub struct WebPLossyEncoder {
1215    quality: u8,
1216}
1217
1218impl WebPLossyEncoder {
1219    /// Creates a new lossy encoder with the given quality (0-100).
1220    ///
1221    /// - 0 = lowest quality / smallest size
1222    /// - 100 = highest quality / largest size
1223    #[must_use]
1224    pub fn new(quality: u8) -> Self {
1225        Self {
1226            quality: quality.min(100),
1227        }
1228    }
1229
1230    /// Maps quality (0-100) to VP8 quantizer index (0-127).
1231    fn quality_to_qindex(&self) -> u8 {
1232        // Linear mapping: quality 100 → qindex 0, quality 0 → qindex 127
1233        let qindex = 127 - (u32::from(self.quality) * 127 / 100);
1234        (qindex as u8).min(127)
1235    }
1236
1237    /// Encodes RGB data to a VP8 bitstream (without RIFF container).
1238    ///
1239    /// The input `data` must contain `width * height * 3` bytes in
1240    /// row-major R, G, B order (8 bits per component).
1241    ///
1242    /// Returns the raw VP8 bitstream bytes suitable for wrapping in a
1243    /// WebP RIFF container.
1244    ///
1245    /// # Errors
1246    ///
1247    /// Returns `CodecError::InvalidParameter` if dimensions are zero or
1248    /// the data length does not match `width * height * 3`.
1249    pub fn encode_rgb(&self, data: &[u8], width: u32, height: u32) -> CodecResult<Vec<u8>> {
1250        self.validate_dimensions(width, height)?;
1251
1252        let expected_len = (width as usize) * (height as usize) * 3;
1253        if data.len() < expected_len {
1254            return Err(CodecError::InvalidParameter(format!(
1255                "RGB data too short: expected {expected_len}, got {}",
1256                data.len()
1257            )));
1258        }
1259
1260        let yuv = rgb_to_yuv420(data, width, height)?;
1261        self.encode_yuv(&yuv)
1262    }
1263
1264    /// Encodes RGBA data to VP8 bitstream + separate alpha channel.
1265    ///
1266    /// Returns `(vp8_data, alpha_data)` where `alpha_data` contains
1267    /// the raw alpha plane bytes (width * height, row-major, uncompressed).
1268    ///
1269    /// # Errors
1270    ///
1271    /// Returns `CodecError::InvalidParameter` if dimensions are zero or
1272    /// the data length does not match `width * height * 4`.
1273    pub fn encode_rgba(
1274        &self,
1275        data: &[u8],
1276        width: u32,
1277        height: u32,
1278    ) -> CodecResult<(Vec<u8>, Vec<u8>)> {
1279        self.validate_dimensions(width, height)?;
1280
1281        let w = width as usize;
1282        let h = height as usize;
1283        let expected_len = w * h * 4;
1284        if data.len() < expected_len {
1285            return Err(CodecError::InvalidParameter(format!(
1286                "RGBA data too short: expected {expected_len}, got {}",
1287                data.len()
1288            )));
1289        }
1290
1291        // Extract RGB and alpha
1292        let pixel_count = w * h;
1293        let mut rgb = Vec::with_capacity(pixel_count * 3);
1294        let mut alpha = Vec::with_capacity(pixel_count);
1295
1296        for i in 0..pixel_count {
1297            let base = i * 4;
1298            rgb.push(data[base]);
1299            rgb.push(data[base + 1]);
1300            rgb.push(data[base + 2]);
1301            alpha.push(data[base + 3]);
1302        }
1303
1304        let vp8_data = self.encode_rgb(&rgb, width, height)?;
1305        Ok((vp8_data, alpha))
1306    }
1307
1308    /// Validates that width and height are non-zero and within VP8 limits.
1309    fn validate_dimensions(&self, width: u32, height: u32) -> CodecResult<()> {
1310        if width == 0 || height == 0 {
1311            return Err(CodecError::InvalidParameter(
1312                "Width and height must be non-zero".to_string(),
1313            ));
1314        }
1315        // VP8 maximum dimension is 16383
1316        if width > 16383 || height > 16383 {
1317            return Err(CodecError::InvalidParameter(format!(
1318                "Dimensions {}x{} exceed VP8 maximum of 16383",
1319                width, height
1320            )));
1321        }
1322        Ok(())
1323    }
1324
1325    /// Core encoding: takes YUV planes and produces a VP8 bitstream.
1326    fn encode_yuv(&self, yuv: &YuvPlanes) -> CodecResult<Vec<u8>> {
1327        let width = yuv.width;
1328        let height = yuv.height;
1329        let mb_width = ((width + 15) / 16) as usize;
1330        let mb_height = ((height + 15) / 16) as usize;
1331
1332        let qindex = self.quality_to_qindex();
1333        let qi = qindex as usize;
1334        let dc_quant = DC_QUANT_TABLE[qi.min(127)];
1335        let ac_quant = AC_QUANT_TABLE[qi.min(127)];
1336        let y2_dc_quant = DC_QUANT_TABLE[qi.min(127)] * 2;
1337        let y2_ac_quant = AC_QUANT_TABLE[qi.min(127)].max(8) * 155 / 100;
1338        let uv_dc_quant = DC_QUANT_TABLE[qi.min(127)];
1339        let uv_ac_quant = AC_QUANT_TABLE[qi.min(127)];
1340
1341        // Reconstructed planes for prediction reference
1342        let recon_y_stride = mb_width * 16;
1343        let recon_uv_stride = mb_width * 8;
1344        let mut recon_y = vec![128u8; recon_y_stride * mb_height * 16];
1345        let mut recon_u = vec![128u8; recon_uv_stride * mb_height * 8];
1346        let mut recon_v = vec![128u8; recon_uv_stride * mb_height * 8];
1347
1348        // Encode frame header (Partition 1)
1349        let mut header_enc = BoolEncoder::new();
1350        write_frame_header(&mut header_enc, mb_width as u32, mb_height as u32, qindex);
1351
1352        // Encode DCT tokens (Partition 2)
1353        let mut token_enc = BoolEncoder::new();
1354
1355        for mby in 0..mb_height {
1356            for mbx in 0..mb_width {
1357                let mb = encode_macroblock(
1358                    yuv,
1359                    mbx,
1360                    mby,
1361                    dc_quant,
1362                    ac_quant,
1363                    y2_dc_quant,
1364                    y2_ac_quant,
1365                    uv_dc_quant,
1366                    uv_ac_quant,
1367                    &recon_y,
1368                    recon_y_stride,
1369                    &recon_u,
1370                    recon_uv_stride,
1371                    &recon_v,
1372                );
1373
1374                // Encode Y2 block (block_type = 3)
1375                encode_block(&mut token_enc, &mb.y2_block, 3, 0);
1376
1377                // Encode 16 Y blocks (block_type = 0 for DC-after-Y2, skip DC)
1378                for sb in 0..16 {
1379                    encode_block(&mut token_enc, &mb.y_blocks[sb], 0, 1);
1380                }
1381
1382                // Encode 4 U blocks (block_type = 2)
1383                for sb in 0..4 {
1384                    encode_block(&mut token_enc, &mb.u_blocks[sb], 2, 0);
1385                }
1386
1387                // Encode 4 V blocks (block_type = 2)
1388                for sb in 0..4 {
1389                    encode_block(&mut token_enc, &mb.v_blocks[sb], 2, 0);
1390                }
1391
1392                // Reconstruct macroblock for prediction reference
1393                let pred_y = compute_dc_pred_16x16(&recon_y, recon_y_stride, mbx, mby);
1394                let pred_u = compute_dc_pred_8x8(&recon_u, recon_uv_stride, mbx, mby);
1395                let pred_v = compute_dc_pred_8x8(&recon_v, recon_uv_stride, mbx, mby);
1396
1397                reconstruct_macroblock(
1398                    &mb,
1399                    dc_quant,
1400                    ac_quant,
1401                    y2_dc_quant,
1402                    y2_ac_quant,
1403                    uv_dc_quant,
1404                    uv_ac_quant,
1405                    pred_y,
1406                    pred_u,
1407                    pred_v,
1408                    &mut recon_y,
1409                    recon_y_stride,
1410                    mbx,
1411                    mby,
1412                    &mut recon_u,
1413                    recon_uv_stride,
1414                    &mut recon_v,
1415                );
1416            }
1417        }
1418
1419        let header_data = header_enc.flush();
1420        let token_data = token_enc.flush();
1421
1422        // Assemble VP8 bitstream
1423        self.assemble_bitstream(width, height, &header_data, &token_data)
1424    }
1425
1426    /// Assembles the final VP8 bitstream from header and token partitions.
1427    fn assemble_bitstream(
1428        &self,
1429        width: u32,
1430        height: u32,
1431        header_data: &[u8],
1432        token_data: &[u8],
1433    ) -> CodecResult<Vec<u8>> {
1434        let first_partition_size = header_data.len() as u32;
1435
1436        // Total output size: frame_tag(3) + sync(3) + dims(4) + partitions
1437        let total_size = 3 + 3 + 4 + header_data.len() + token_data.len();
1438        let mut output = Vec::with_capacity(total_size);
1439
1440        // --- Frame tag (3 bytes) ---
1441        // bit 0: frame_type (0 = keyframe)
1442        // bits 1-3: version (0)
1443        // bit 4: show_frame (1)
1444        // bits 5-7 of byte 0 + bytes 1-2: first_partition_size (19 bits)
1445        let b0: u8 = 0x00  // frame_type = 0 (key)
1446            | 0x00          // version = 0
1447            | 0x10          // show_frame = 1
1448            | ((first_partition_size << 5) as u8 & 0xE0);
1449        let b1: u8 = (first_partition_size >> 3) as u8;
1450        let b2: u8 = (first_partition_size >> 11) as u8;
1451
1452        output.push(b0);
1453        output.push(b1);
1454        output.push(b2);
1455
1456        // --- Sync code ---
1457        output.push(0x9D);
1458        output.push(0x01);
1459        output.push(0x2A);
1460
1461        // --- Dimensions (4 bytes, LE) ---
1462        // width: bits 0-13, horizontal_scale: bits 14-15
1463        let w_le = (width & 0x3FFF) as u16;
1464        output.push(w_le as u8);
1465        output.push((w_le >> 8) as u8);
1466
1467        let h_le = (height & 0x3FFF) as u16;
1468        output.push(h_le as u8);
1469        output.push((h_le >> 8) as u8);
1470
1471        // --- Partition 1 (header) ---
1472        output.extend_from_slice(header_data);
1473
1474        // --- Partition 2 (tokens) ---
1475        output.extend_from_slice(token_data);
1476
1477        Ok(output)
1478    }
1479}
1480
1481// ---------------------------------------------------------------------------
1482// Tests
1483// ---------------------------------------------------------------------------
1484
1485#[cfg(test)]
1486mod tests {
1487    use super::*;
1488
1489    #[test]
1490    fn test_bool_encoder_basic() {
1491        let mut enc = BoolEncoder::new();
1492        enc.encode_bit(false);
1493        enc.encode_bit(true);
1494        enc.encode_bit(false);
1495        let data = enc.flush();
1496        assert!(!data.is_empty());
1497    }
1498
1499    #[test]
1500    fn test_bool_encoder_literal() {
1501        let mut enc = BoolEncoder::new();
1502        enc.encode_literal(42, 8);
1503        let data = enc.flush();
1504        assert!(!data.is_empty());
1505    }
1506
1507    #[test]
1508    fn test_bool_encoder_with_prob() {
1509        let mut enc = BoolEncoder::new();
1510        // Encode several symbols with different probabilities
1511        for prob in [1, 50, 128, 200, 255] {
1512            enc.encode_bool(true, prob);
1513            enc.encode_bool(false, prob);
1514        }
1515        let data = enc.flush();
1516        assert!(!data.is_empty());
1517    }
1518
1519    #[test]
1520    fn test_fdct4_1d() {
1521        let input = [100, 100, 100, 100]; // DC-only signal
1522        let mut output = [0i32; 4];
1523        fdct4_1d(&input, &mut output);
1524
1525        // For a flat signal, DC should be large and dominant
1526        assert!(output[0] > 0);
1527        // AC may have small rounding artifacts from integer approximation
1528        assert!(output[0].abs() > output[1].abs());
1529        assert!(output[0].abs() > output[2].abs());
1530        assert!(output[0].abs() > output[3].abs());
1531    }
1532
1533    #[test]
1534    fn test_fdct4x4_dc_only() {
1535        let residual = [10i32; 16]; // Flat residual
1536        let mut coeffs = [0i32; 16];
1537        fdct4x4(&residual, &mut coeffs);
1538
1539        // DC coefficient should be dominant
1540        assert!(coeffs[0].abs() > 0);
1541        // AC should be much smaller than DC for flat input
1542        let dc_abs = coeffs[0].abs();
1543        for i in 1..16 {
1544            assert!(
1545                coeffs[i].abs() < dc_abs / 2,
1546                "AC coeff[{i}] = {} should be much smaller than DC = {}",
1547                coeffs[i],
1548                coeffs[0]
1549            );
1550        }
1551    }
1552
1553    #[test]
1554    fn test_fwht4x4_dc_only() {
1555        let dc_values = [100i32; 16]; // All same DC
1556        let mut coeffs = [0i32; 16];
1557        fwht4x4(&dc_values, &mut coeffs);
1558
1559        // DC should be 16 * 100 = 1600
1560        assert_eq!(coeffs[0], 1600);
1561        // AC should be 0
1562        for i in 1..16 {
1563            assert_eq!(coeffs[i], 0);
1564        }
1565    }
1566
1567    #[test]
1568    fn test_quantize() {
1569        assert_eq!(quantize(100, 10), 10);
1570        assert_eq!(quantize(-100, 10), -10);
1571        assert_eq!(quantize(0, 10), 0);
1572        assert_eq!(quantize(4, 10), 0); // Below threshold
1573        assert_eq!(quantize(15, 10), 2); // (15+5)/10 = 2
1574    }
1575
1576    #[test]
1577    fn test_quality_to_qindex() {
1578        let enc_low = WebPLossyEncoder::new(0);
1579        let enc_mid = WebPLossyEncoder::new(50);
1580        let enc_high = WebPLossyEncoder::new(100);
1581
1582        assert_eq!(enc_high.quality_to_qindex(), 0);
1583        assert_eq!(enc_low.quality_to_qindex(), 127);
1584        assert!(enc_mid.quality_to_qindex() > 0);
1585        assert!(enc_mid.quality_to_qindex() < 127);
1586    }
1587
1588    #[test]
1589    fn test_rgb_to_yuv420_basic() {
1590        // 4x4 white image
1591        let data = vec![255u8; 4 * 4 * 3];
1592        let yuv = rgb_to_yuv420(&data, 4, 4).expect("conversion should succeed");
1593
1594        // White (255,255,255) → Y ≈ 255
1595        for &y in &yuv.y[..16] {
1596            assert!(y >= 250, "Y should be near 255 for white, got {y}");
1597        }
1598    }
1599
1600    #[test]
1601    fn test_rgb_to_yuv420_black() {
1602        // 4x4 black image
1603        let data = vec![0u8; 4 * 4 * 3];
1604        let yuv = rgb_to_yuv420(&data, 4, 4).expect("conversion should succeed");
1605
1606        // Black (0,0,0) → Y = 0, U = 128, V = 128
1607        for &y in &yuv.y[..16] {
1608            assert!(y <= 5, "Y should be near 0 for black, got {y}");
1609        }
1610        for &u in &yuv.u[..4] {
1611            assert!(
1612                (120..=136).contains(&u),
1613                "U should be near 128 for black, got {u}"
1614            );
1615        }
1616    }
1617
1618    #[test]
1619    fn test_rgb_to_yuv420_short_data() {
1620        let data = vec![0u8; 10]; // too short for any image
1621        assert!(rgb_to_yuv420(&data, 4, 4).is_err());
1622    }
1623
1624    #[test]
1625    fn test_encode_rgb_produces_valid_frame_tag() {
1626        let encoder = WebPLossyEncoder::new(50);
1627        // 16x16 gray image
1628        let data = vec![128u8; 16 * 16 * 3];
1629        let vp8 = encoder
1630            .encode_rgb(&data, 16, 16)
1631            .expect("encode should succeed");
1632
1633        // Check sync code at bytes 3..6
1634        assert!(vp8.len() >= 10);
1635        assert_eq!(vp8[3], 0x9D);
1636        assert_eq!(vp8[4], 0x01);
1637        assert_eq!(vp8[5], 0x2A);
1638
1639        // Check frame type (keyframe = bit 0 of byte 0 is 0)
1640        assert_eq!(vp8[0] & 0x01, 0, "Should be keyframe");
1641
1642        // Check show_frame (bit 4 of byte 0)
1643        assert_ne!(vp8[0] & 0x10, 0, "show_frame should be set");
1644
1645        // Check dimensions
1646        let w = u16::from(vp8[6]) | (u16::from(vp8[7]) << 8);
1647        let h = u16::from(vp8[8]) | (u16::from(vp8[9]) << 8);
1648        assert_eq!(w & 0x3FFF, 16);
1649        assert_eq!(h & 0x3FFF, 16);
1650    }
1651
1652    #[test]
1653    fn test_encode_rgb_different_qualities() {
1654        let data = vec![100u8; 32 * 32 * 3];
1655
1656        let low = WebPLossyEncoder::new(10);
1657        let high = WebPLossyEncoder::new(90);
1658
1659        let low_data = low.encode_rgb(&data, 32, 32).expect("low quality encode");
1660        let high_data = high.encode_rgb(&data, 32, 32).expect("high quality encode");
1661
1662        // Both should produce valid output
1663        assert!(!low_data.is_empty());
1664        assert!(!high_data.is_empty());
1665    }
1666
1667    #[test]
1668    fn test_encode_rgb_non_mb_aligned() {
1669        // 7x5 image: not aligned to 16x16 macroblock grid
1670        let encoder = WebPLossyEncoder::new(75);
1671        let data = vec![200u8; 7 * 5 * 3];
1672        let vp8 = encoder
1673            .encode_rgb(&data, 7, 5)
1674            .expect("non-aligned encode should succeed");
1675
1676        assert!(!vp8.is_empty());
1677
1678        // Dimensions in bitstream should match original, not padded
1679        let w = u16::from(vp8[6]) | (u16::from(vp8[7]) << 8);
1680        let h = u16::from(vp8[8]) | (u16::from(vp8[9]) << 8);
1681        assert_eq!(w & 0x3FFF, 7);
1682        assert_eq!(h & 0x3FFF, 5);
1683    }
1684
1685    #[test]
1686    fn test_encode_rgba_basic() {
1687        let encoder = WebPLossyEncoder::new(75);
1688        // 4x4 red with 50% alpha
1689        let mut rgba = Vec::with_capacity(4 * 4 * 4);
1690        for _ in 0..16 {
1691            rgba.extend_from_slice(&[255, 0, 0, 128]);
1692        }
1693
1694        let (vp8_data, alpha_data) = encoder
1695            .encode_rgba(&rgba, 4, 4)
1696            .expect("RGBA encode should succeed");
1697
1698        assert!(!vp8_data.is_empty());
1699        assert_eq!(alpha_data.len(), 16);
1700        assert!(alpha_data.iter().all(|&a| a == 128));
1701    }
1702
1703    #[test]
1704    fn test_encode_zero_dimensions() {
1705        let encoder = WebPLossyEncoder::new(50);
1706        assert!(encoder.encode_rgb(&[], 0, 10).is_err());
1707        assert!(encoder.encode_rgb(&[], 10, 0).is_err());
1708    }
1709
1710    #[test]
1711    fn test_encode_too_short_data() {
1712        let encoder = WebPLossyEncoder::new(50);
1713        let data = vec![0u8; 10];
1714        assert!(encoder.encode_rgb(&data, 16, 16).is_err());
1715    }
1716
1717    #[test]
1718    fn test_encode_oversized_dimensions() {
1719        let encoder = WebPLossyEncoder::new(50);
1720        assert!(encoder.encode_rgb(&[], 20000, 100).is_err());
1721    }
1722
1723    #[test]
1724    fn test_idct4x4_simple_dc() {
1725        // DC-only input
1726        let mut coeffs = [0i32; 16];
1727        coeffs[0] = 400;
1728
1729        let output = idct4x4_simple(&coeffs);
1730        // All outputs should be roughly equal (DC distributed)
1731        let avg = output.iter().sum::<i32>() / 16;
1732        for &v in &output {
1733            assert!(
1734                (v - avg).abs() <= 2,
1735                "DC-only IDCT should produce roughly uniform output"
1736            );
1737        }
1738    }
1739
1740    #[test]
1741    fn test_encode_rgb_1x1() {
1742        // Smallest possible image
1743        let encoder = WebPLossyEncoder::new(75);
1744        let data = [128, 128, 128]; // gray pixel
1745        let vp8 = encoder
1746            .encode_rgb(&data, 1, 1)
1747            .expect("1x1 encode should succeed");
1748
1749        assert!(!vp8.is_empty());
1750        // Verify it's a keyframe
1751        assert_eq!(vp8[0] & 0x01, 0);
1752    }
1753
1754    #[test]
1755    fn test_encode_quality_extremes() {
1756        let data = vec![128u8; 16 * 16 * 3];
1757
1758        // Quality 0
1759        let enc0 = WebPLossyEncoder::new(0);
1760        let out0 = enc0.encode_rgb(&data, 16, 16).expect("q0");
1761        assert!(!out0.is_empty());
1762
1763        // Quality 100
1764        let enc100 = WebPLossyEncoder::new(100);
1765        let out100 = enc100.encode_rgb(&data, 16, 16).expect("q100");
1766        assert!(!out100.is_empty());
1767
1768        // Quality > 100 should clamp
1769        let enc200 = WebPLossyEncoder::new(200);
1770        assert_eq!(enc200.quality, 100);
1771    }
1772
1773    #[test]
1774    fn test_encode_rgb_colored_image() {
1775        // Create a simple gradient image
1776        let width = 32u32;
1777        let height = 32u32;
1778        let mut data = Vec::with_capacity((width * height * 3) as usize);
1779        for y in 0..height {
1780            for x in 0..width {
1781                data.push((x * 8) as u8); // R
1782                data.push((y * 8) as u8); // G
1783                data.push(128); // B
1784            }
1785        }
1786
1787        let encoder = WebPLossyEncoder::new(80);
1788        let vp8 = encoder
1789            .encode_rgb(&data, width, height)
1790            .expect("gradient encode should succeed");
1791
1792        // Verify valid VP8 header
1793        assert!(vp8.len() > 10);
1794        assert_eq!(vp8[3], 0x9D);
1795        assert_eq!(vp8[4], 0x01);
1796        assert_eq!(vp8[5], 0x2A);
1797    }
1798
1799    #[test]
1800    fn test_first_partition_size_encoding() {
1801        // The first_partition_size must be correctly encoded in the frame tag
1802        let encoder = WebPLossyEncoder::new(50);
1803        let data = vec![128u8; 16 * 16 * 3];
1804        let vp8 = encoder.encode_rgb(&data, 16, 16).expect("encode");
1805
1806        // Extract first_partition_size from frame tag
1807        let b0 = vp8[0];
1808        let b1 = vp8[1];
1809        let b2 = vp8[2];
1810        let fps = (u32::from(b0 >> 5) & 0x07) | (u32::from(b1) << 3) | (u32::from(b2) << 11);
1811
1812        // The partition should start after the 10-byte header
1813        // and its size should be reasonable
1814        assert!(fps > 0, "first_partition_size should be non-zero");
1815        assert!(
1816            (fps as usize) < vp8.len(),
1817            "first_partition_size ({fps}) should be less than total ({}) ",
1818            vp8.len()
1819        );
1820    }
1821
1822    #[test]
1823    fn test_compute_dc_pred_16x16_no_neighbors() {
1824        let recon = vec![0u8; 16 * 16];
1825        let pred = compute_dc_pred_16x16(&recon, 16, 0, 0);
1826        assert_eq!(pred, 128); // Default when no neighbors available
1827    }
1828
1829    #[test]
1830    fn test_compute_dc_pred_16x16_with_top() {
1831        // Place known values in the row above (mb_y=1, top row is row 15)
1832        let stride = 32;
1833        let mut recon = vec![0u8; stride * 32];
1834        for col in 0..16 {
1835            recon[15 * stride + col] = 200;
1836        }
1837        let pred = compute_dc_pred_16x16(&recon, stride, 0, 1);
1838        assert_eq!(pred, 200);
1839    }
1840
1841    #[test]
1842    fn test_fwht_iwht_roundtrip() {
1843        // Verify forward WHT structural correctness:
1844        // A uniform input should produce a single DC coefficient.
1845        let uniform = [50i32; 16];
1846        let mut wht_coeffs = [0i32; 16];
1847        fwht4x4(&uniform, &mut wht_coeffs);
1848
1849        // DC = sum of all = 50*16 = 800
1850        assert_eq!(wht_coeffs[0], 800);
1851        // All AC should be zero for uniform input
1852        for i in 1..16 {
1853            assert_eq!(wht_coeffs[i], 0, "AC coeff at index {i} should be 0");
1854        }
1855
1856        // Verify that a non-uniform input produces non-zero AC
1857        let varied = [
1858            10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160,
1859        ];
1860        fwht4x4(&varied, &mut wht_coeffs);
1861
1862        // DC should equal sum of all values
1863        let total: i32 = varied.iter().sum();
1864        assert_eq!(wht_coeffs[0], total);
1865
1866        // At least some AC coefficients should be non-zero
1867        let nonzero_ac = wht_coeffs[1..].iter().filter(|&&c| c != 0).count();
1868        assert!(nonzero_ac > 0, "Non-uniform input should have non-zero AC");
1869    }
1870}