gamut-webp 0.2.0

WebP image encoder and decoder (intra-frame VP8/VP8L still images in a RIFF container).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
//! VP8L transforms: reversible pixel-decorrelation passes applied before entropy coding and undone
//! in reverse order on decode (RFC 9649 §3.5).
//!
//! This module owns both the [`Vp8lTransform`] representation (the type + the per-transform data a
//! decoder reads) and the per-pixel math (predictors, color-transform deltas). Forward application
//! lands with the encoder (issue #22 commit series); the inverse application here is what the
//! decoder runs, last-read transform first.

use gamut_color::clip_pixel8;

use crate::vp8l::div_round_up;

/// Transform type code for the 2-bit on-the-wire tag: spatial predictor (RFC 9649 §4.1).
pub const PREDICTOR_TRANSFORM: u8 = 0;
/// Transform type code: color decorrelation (§4.2).
pub const COLOR_TRANSFORM: u8 = 1;
/// Transform type code: subtract-green (§4.3).
pub const SUBTRACT_GREEN_TRANSFORM: u8 = 2;
/// Transform type code: color indexing / palette (§4.4).
pub const COLOR_INDEXING_TRANSFORM: u8 = 3;

/// Number of spatial predictor modes (RFC 9649 §4.1).
pub const NUM_PREDICTOR_MODES: u8 = 14;

/// A VP8L transform together with the data needed to invert it. Up to four transforms may be
/// chained; on decode they are inverted in the reverse of the order they were read (RFC 9649 §3.5).
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum Vp8lTransform {
    /// Predictor (spatial) transform: each pixel is predicted from its neighbors using one of 14
    /// modes; `blocks` is the sub-resolution image whose green channel selects the mode per
    /// `2^size_bits` block.
    Predictor {
        /// Block-size exponent (`block edge = 1 << size_bits`).
        size_bits: u8,
        /// Sub-resolution mode image (one ARGB pixel per block; mode = green channel).
        blocks: Vec<u32>,
    },
    /// Color transform: decorrelates red/blue from green per block; `blocks` carries the
    /// `ColorTransformElement` per block.
    Color {
        /// Block-size exponent (`block edge = 1 << size_bits`).
        size_bits: u8,
        /// Sub-resolution element image (one ARGB pixel per block).
        blocks: Vec<u32>,
    },
    /// Subtract-green transform: green is subtracted from red and blue (no associated data).
    SubtractGreen,
    /// Color-indexing (palette) transform: pixels are indices into `table`; `expanded_width` is the
    /// image width before pixel-bundling reduced it.
    ColorIndexing {
        /// The color table (already subtraction-decoded).
        table: Vec<u32>,
        /// Image width before bundling (the width the inverse expands back to).
        expanded_width: u32,
    },
}

impl Vp8lTransform {
    /// Applies this transform's inverse to `pixels` (an ARGB image of `width` × `height`), returning
    /// the image width afterwards (unchanged except for color-indexing, which expands it).
    pub fn apply_inverse(&self, pixels: &mut Vec<u32>, width: u32, height: u32) -> u32 {
        match self {
            Self::SubtractGreen => {
                inverse_subtract_green(pixels);
                width
            }
            Self::Predictor { size_bits, blocks } => {
                inverse_predictor(pixels, width, height, *size_bits, blocks);
                width
            }
            Self::Color { size_bits, blocks } => {
                inverse_color(pixels, width, height, *size_bits, blocks);
                width
            }
            Self::ColorIndexing {
                table,
                expanded_width,
            } => {
                *pixels = inverse_color_indexing(pixels, width, height, *expanded_width, table);
                *expanded_width
            }
        }
    }
}

// --- ARGB channel helpers (shared with the decoder/encoder) ---------------------------------------

/// Extracts the alpha channel of an `0xAARRGGBB` pixel.
#[inline]
#[must_use]
pub const fn alpha(p: u32) -> u8 {
    (p >> 24) as u8
}
/// Extracts the red channel of an `0xAARRGGBB` pixel.
#[inline]
#[must_use]
pub const fn red(p: u32) -> u8 {
    (p >> 16) as u8
}
/// Extracts the green channel of an `0xAARRGGBB` pixel.
#[inline]
#[must_use]
pub const fn green(p: u32) -> u8 {
    (p >> 8) as u8
}
/// Extracts the blue channel of an `0xAARRGGBB` pixel.
#[inline]
#[must_use]
pub const fn blue(p: u32) -> u8 {
    p as u8
}
/// Packs four channels into an `0xAARRGGBB` pixel.
#[inline]
#[must_use]
pub const fn make_argb(a: u8, r: u8, g: u8, b: u8) -> u32 {
    ((a as u32) << 24) | ((r as u32) << 16) | ((g as u32) << 8) | (b as u32)
}

/// Adds two pixels channel-wise modulo 256 (used to subtraction-decode/encode the color table).
#[inline]
#[must_use]
pub const fn add_pixels(a: u32, b: u32) -> u32 {
    make_argb(
        alpha(a).wrapping_add(alpha(b)),
        red(a).wrapping_add(red(b)),
        green(a).wrapping_add(green(b)),
        blue(a).wrapping_add(blue(b)),
    )
}

/// Subtracts two pixels channel-wise modulo 256 (used to subtraction-encode the color table).
#[inline]
#[must_use]
pub const fn subtract_pixels(a: u32, b: u32) -> u32 {
    make_argb(
        alpha(a).wrapping_sub(alpha(b)),
        red(a).wrapping_sub(red(b)),
        green(a).wrapping_sub(green(b)),
        blue(a).wrapping_sub(blue(b)),
    )
}

// --- Subtract-green (RFC 9649 §4.3) ---------------------------------------------------------------

/// Inverse subtract-green: add the green value back into red and blue for every pixel.
pub fn inverse_subtract_green(pixels: &mut [u32]) {
    for p in pixels.iter_mut() {
        let g = green(*p);
        let r = red(*p).wrapping_add(g);
        let b = blue(*p).wrapping_add(g);
        *p = make_argb(alpha(*p), r, g, b);
    }
}

/// Forward subtract-green: subtract the green value from red and blue for every pixel.
pub fn forward_subtract_green(pixels: &mut [u32]) {
    for p in pixels.iter_mut() {
        let g = green(*p);
        let r = red(*p).wrapping_sub(g);
        let b = blue(*p).wrapping_sub(g);
        *p = make_argb(alpha(*p), r, g, b);
    }
}

// --- Predictor transform (RFC 9649 §4.1) ----------------------------------------------------------

#[inline]
fn average2(a: u32, b: u32) -> u32 {
    make_argb(
        ((u16::from(alpha(a)) + u16::from(alpha(b))) / 2) as u8,
        ((u16::from(red(a)) + u16::from(red(b))) / 2) as u8,
        ((u16::from(green(a)) + u16::from(green(b))) / 2) as u8,
        ((u16::from(blue(a)) + u16::from(blue(b))) / 2) as u8,
    )
}

/// The `Select` predictor: returns whichever of `l`/`t` is closer (Manhattan) to `l + t - tl`.
fn select(l: u32, t: u32, tl: u32) -> u32 {
    let pred = |cl: u8, ct: u8, ctl: u8| i32::from(cl) + i32::from(ct) - i32::from(ctl);
    let pa = pred(alpha(l), alpha(t), alpha(tl));
    let pr = pred(red(l), red(t), red(tl));
    let pg = pred(green(l), green(t), green(tl));
    let pb = pred(blue(l), blue(t), blue(tl));
    let dist = |p: u32, a: i32, r: i32, g: i32, b: i32| {
        (a - i32::from(alpha(p))).abs()
            + (r - i32::from(red(p))).abs()
            + (g - i32::from(green(p))).abs()
            + (b - i32::from(blue(p))).abs()
    };
    if dist(l, pa, pr, pg, pb) < dist(t, pa, pr, pg, pb) {
        l
    } else {
        t
    }
}

#[inline]
fn clamp_add_subtract_full(a: u32, b: u32, c: u32) -> u32 {
    let f = |ca: u8, cb: u8, cc: u8| clip_pixel8(i32::from(ca) + i32::from(cb) - i32::from(cc));
    make_argb(
        f(alpha(a), alpha(b), alpha(c)),
        f(red(a), red(b), red(c)),
        f(green(a), green(b), green(c)),
        f(blue(a), blue(b), blue(c)),
    )
}

#[inline]
fn clamp_add_subtract_half(a: u32, b: u32) -> u32 {
    let f = |ca: u8, cb: u8| {
        let ca = i32::from(ca);
        clip_pixel8(ca + (ca - i32::from(cb)) / 2)
    };
    make_argb(
        f(alpha(a), alpha(b)),
        f(red(a), red(b)),
        f(green(a), green(b)),
        f(blue(a), blue(b)),
    )
}

/// Predicts a pixel value from neighbors `l` (left), `t` (top), `tl` (top-left), `tr` (top-right)
/// under one of the 14 predictor modes (RFC 9649 §4.1). Modes out of range fall back to mode 0.
#[must_use]
pub fn predict(mode: u8, l: u32, t: u32, tl: u32, tr: u32) -> u32 {
    match mode {
        0 => 0xff00_0000,
        1 => l,
        2 => t,
        3 => tr,
        4 => tl,
        5 => average2(average2(l, tr), t),
        6 => average2(l, tl),
        7 => average2(l, t),
        8 => average2(tl, t),
        9 => average2(t, tr),
        10 => average2(average2(l, tl), average2(t, tr)),
        11 => select(l, t, tl),
        12 => clamp_add_subtract_full(l, t, tl),
        13 => clamp_add_subtract_half(average2(l, t), tl),
        _ => 0xff00_0000,
    }
}

/// Adds the per-channel residual `res` to a predicted pixel `pred` (mod 256 per channel).
#[inline]
fn add_residual(res: u32, pred: u32) -> u32 {
    make_argb(
        alpha(res).wrapping_add(alpha(pred)),
        red(res).wrapping_add(red(pred)),
        green(res).wrapping_add(green(pred)),
        blue(res).wrapping_add(blue(pred)),
    )
}

/// Subtracts a predicted pixel `pred` from a value `value` (mod 256 per channel) — the forward of
/// [`add_residual`].
#[inline]
fn sub_residual(value: u32, pred: u32) -> u32 {
    make_argb(
        alpha(value).wrapping_sub(alpha(pred)),
        red(value).wrapping_sub(red(pred)),
        green(value).wrapping_sub(green(pred)),
        blue(value).wrapping_sub(blue(pred)),
    )
}

/// The predicted value for pixel `(x, y)` from already-known neighbors in `img` (RFC 9649 §4.1),
/// applied identically by both the forward and inverse passes so they always agree. The border
/// rules (top-left, top row, left column, rightmost-column TR wrap) are handled here; `mode` is used
/// only for interior pixels.
#[inline]
fn predicted_value(img: &[u32], width: usize, x: usize, y: usize, mode: u8) -> u32 {
    let idx = y * width + x;
    if x == 0 && y == 0 {
        0xff00_0000
    } else if y == 0 {
        img[idx - 1] // top row: predict from left
    } else if x == 0 {
        img[idx - width] // left column: predict from top
    } else {
        let l = img[idx - 1];
        let t = img[idx - width];
        let tl = img[idx - width - 1];
        // TR is the pixel above-right, except on the rightmost column where it wraps to the first
        // pixel of the current row (the contiguous-buffer behavior, RFC 9649 §4.1).
        let tr = if x + 1 < width {
            img[idx - width + 1]
        } else {
            img[y * width]
        };
        predict(mode, l, t, tl, tr)
    }
}

/// Looks up the predictor mode for pixel `(x, y)` from the sub-resolution mode image.
#[inline]
fn block_mode(blocks: &[u32], transform_width: usize, size_bits: u8, x: usize, y: usize) -> u8 {
    let block = (y >> size_bits) * transform_width + (x >> size_bits);
    blocks.get(block).map_or(0, |&b| green(b))
}

/// Inverse predictor transform: reconstructs `pixels` (holding residuals) in scan order, adding back
/// each pixel's predicted value. `blocks` is the sub-resolution mode image (RFC 9649 §4.1).
pub fn inverse_predictor(
    pixels: &mut [u32],
    width: u32,
    height: u32,
    size_bits: u8,
    blocks: &[u32],
) {
    if width == 0 || height == 0 {
        return;
    }
    let w = width as usize;
    let transform_width = div_round_up(width, 1 << size_bits) as usize;
    for y in 0..height as usize {
        for x in 0..w {
            let mode = block_mode(blocks, transform_width, size_bits, x, y);
            let pred = predicted_value(pixels, w, x, y, mode);
            let idx = y * w + x;
            pixels[idx] = add_residual(pixels[idx], pred);
        }
    }
}

/// Forward predictor transform: chooses a per-block predictor mode and replaces each pixel with its
/// residual `pixel - prediction`. Returns `(residuals, mode_sub_image)`; the sub-image's green
/// channel carries the chosen mode per block (RFC 9649 §4.1). Mode selection minimizes the summed
/// residual magnitude — a simple heuristic; optimal selection is deferred to issue #31.
#[must_use]
pub fn forward_predictor(
    pixels: &[u32],
    width: u32,
    height: u32,
    size_bits: u8,
) -> (Vec<u32>, Vec<u32>) {
    let w = width as usize;
    let h = height as usize;
    let transform_width = div_round_up(width, 1 << size_bits) as usize;
    let transform_height = div_round_up(height, 1 << size_bits) as usize;

    let mut sub_image = vec![0xff00_0000u32; transform_width * transform_height];
    for by in 0..transform_height {
        for bx in 0..transform_width {
            let mode = best_predictor_mode(pixels, w, h, size_bits, bx, by);
            sub_image[by * transform_width + bx] = make_argb(0xff, 0, mode, 0);
        }
    }

    let mut residuals = vec![0u32; pixels.len()];
    for y in 0..h {
        for x in 0..w {
            let mode = block_mode(&sub_image, transform_width, size_bits, x, y);
            let pred = predicted_value(pixels, w, x, y, mode);
            let idx = y * w + x;
            residuals[idx] = sub_residual(pixels[idx], pred);
        }
    }
    (residuals, sub_image)
}

/// Channel-wise "distance from zero modulo 256" — a cheap proxy for residual entropy.
#[inline]
fn residual_cost(res: u32) -> u64 {
    let c = |v: u8| u64::from(v.min(v.wrapping_neg()));
    c(alpha(res)) + c(red(res)) + c(green(res)) + c(blue(res))
}

/// Picks the predictor mode that minimizes the summed residual magnitude over a block.
fn best_predictor_mode(
    pixels: &[u32],
    width: usize,
    height: usize,
    size_bits: u8,
    bx: usize,
    by: usize,
) -> u8 {
    let block_size = 1usize << size_bits;
    let x0 = bx * block_size;
    let y0 = by * block_size;
    let mut best_mode = 0u8;
    let mut best_cost = u64::MAX;
    for mode in 0..NUM_PREDICTOR_MODES {
        let mut cost = 0u64;
        for y in y0..(y0 + block_size).min(height) {
            for x in x0..(x0 + block_size).min(width) {
                let pred = predicted_value(pixels, width, x, y, mode);
                cost += residual_cost(sub_residual(pixels[y * width + x], pred));
            }
        }
        if cost < best_cost {
            best_cost = cost;
            best_mode = mode;
        }
    }
    best_mode
}

// --- Color transform (RFC 9649 §4.2) --------------------------------------------------------------

/// The color-transform delta: a 3.5 fixed-point multiply of a transform coefficient `t` and a signed
/// color channel `c`, keeping only the arithmetic-shifted product (RFC 9649 §4.2).
#[inline]
#[must_use]
pub fn color_transform_delta(t: i8, c: i8) -> i32 {
    (i32::from(t) * i32::from(c)) >> 5
}

/// Inverse color transform: adds the per-block color deltas back into red and blue. `blocks` carries
/// one `ColorTransformElement` per block, packed as `A=255, R=red_to_blue, G=green_to_blue,
/// B=green_to_red` (RFC 9649 §4.2).
pub fn inverse_color(pixels: &mut [u32], width: u32, height: u32, size_bits: u8, blocks: &[u32]) {
    if width == 0 || height == 0 {
        return;
    }
    let w = width as usize;
    let transform_width = div_round_up(width, 1 << size_bits) as usize;
    for y in 0..height as usize {
        for x in 0..w {
            let idx = y * w + x;
            let p = pixels[idx];
            let block = (y >> size_bits) * transform_width + (x >> size_bits);
            let cte = blocks.get(block).copied().unwrap_or(0xff00_0000);
            let green_to_red = blue(cte) as i8;
            let green_to_blue = green(cte) as i8;
            let red_to_blue = red(cte) as i8;
            let g = green(p);
            let mut tmp_red = i32::from(red(p));
            let mut tmp_blue = i32::from(blue(p));
            tmp_red += color_transform_delta(green_to_red, g as i8);
            tmp_blue += color_transform_delta(green_to_blue, g as i8);
            tmp_blue += color_transform_delta(red_to_blue, (tmp_red & 0xff) as i8);
            pixels[idx] = make_argb(alpha(p), (tmp_red & 0xff) as u8, g, (tmp_blue & 0xff) as u8);
        }
    }
}

/// Packs a `ColorTransformElement` into a sub-image pixel as the spec prescribes: `A=255,
/// R=red_to_blue, G=green_to_blue, B=green_to_red` (RFC 9649 §4.2).
#[inline]
#[must_use]
fn pack_color_element(green_to_red: i8, green_to_blue: i8, red_to_blue: i8) -> u32 {
    make_argb(
        0xff,
        red_to_blue as u8,
        green_to_blue as u8,
        green_to_red as u8,
    )
}

/// Forward color transform: subtracts the per-block color deltas from red and blue. Returns the
/// sub-resolution element image. The forward uses the **original** red for the `red_to_blue` term,
/// matching the inverse which adds it back using the reconstructed (== original) red (RFC 9649
/// §4.2). Per-block elements are estimated by simple least squares; optimal selection is deferred to
/// issue #31.
#[must_use]
pub fn forward_color(pixels: &mut [u32], width: u32, height: u32, size_bits: u8) -> Vec<u32> {
    let w = width as usize;
    let h = height as usize;
    let transform_width = div_round_up(width, 1 << size_bits) as usize;
    let transform_height = div_round_up(height, 1 << size_bits) as usize;
    let block_size = 1usize << size_bits;

    let mut sub_image = vec![pack_color_element(0, 0, 0); transform_width * transform_height];
    for by in 0..transform_height {
        for bx in 0..transform_width {
            let (g2r, g2b, r2b) = estimate_color_element(pixels, w, h, block_size, bx, by);
            sub_image[by * transform_width + bx] = pack_color_element(g2r, g2b, r2b);
            for y in (by * block_size)..((by + 1) * block_size).min(h) {
                for x in (bx * block_size)..((bx + 1) * block_size).min(w) {
                    let p = pixels[y * w + x];
                    let g = green(p) as i8;
                    let orig_red = red(p);
                    let new_red = (i32::from(orig_red) - color_transform_delta(g2r, g)) & 0xff;
                    let new_blue = (i32::from(blue(p))
                        - color_transform_delta(g2b, g)
                        - color_transform_delta(r2b, orig_red as i8))
                        & 0xff;
                    pixels[y * w + x] =
                        make_argb(alpha(p), new_red as u8, green(p), new_blue as u8);
                }
            }
        }
    }
    sub_image
}

/// Estimates the three color-transform coefficients for one block by least squares against the
/// channel pairs `(red, green)`, `(blue, green)`, and `(blue, red)`.
fn estimate_color_element(
    pixels: &[u32],
    width: usize,
    height: usize,
    block_size: usize,
    bx: usize,
    by: usize,
) -> (i8, i8, i8) {
    let mut rg = 0i64; // sum(red * green)
    let mut gg = 0i64; // sum(green * green)
    let mut bg = 0i64; // sum(blue * green)
    let mut br = 0i64; // sum(blue * red)
    let mut rr = 0i64; // sum(red * red)
    for y in (by * block_size)..((by + 1) * block_size).min(height) {
        for x in (bx * block_size)..((bx + 1) * block_size).min(width) {
            let p = pixels[y * width + x];
            let r = i64::from(red(p) as i8);
            let g = i64::from(green(p) as i8);
            let b = i64::from(blue(p) as i8);
            rg += r * g;
            gg += g * g;
            bg += b * g;
            br += b * r;
            rr += r * r;
        }
    }
    // delta(t, c) ≈ t * c / 32, so t ≈ 32 * sum(target * basis) / sum(basis^2).
    let coeff = |num: i64, den: i64| -> i8 {
        if den == 0 {
            0
        } else {
            ((num * 32) / den).clamp(-128, 127) as i8
        }
    };
    (coeff(rg, gg), coeff(bg, gg), coeff(br, rr))
}

// --- Color indexing (RFC 9649 §4.4) ---------------------------------------------------------------

/// The pixel-bundling exponent for a color table of `table_size` entries: how many pixels are packed
/// into one (RFC 9649 §4.4).
#[must_use]
pub fn color_index_width_bits(table_size: usize) -> u8 {
    if table_size <= 2 {
        3
    } else if table_size <= 4 {
        2
    } else if table_size <= 16 {
        1
    } else {
        0
    }
}

/// Forward color-indexing: maps each pixel to its `palette` index and packs the indices into the
/// green channel (pixel bundling), returning `(bundled_image, bundled_width)` (RFC 9649 §4.4).
/// Callers must guarantee every pixel appears in `palette`.
#[must_use]
pub fn forward_color_indexing(
    pixels: &[u32],
    width: u32,
    height: u32,
    palette: &[u32],
) -> (Vec<u32>, u32) {
    use std::collections::HashMap;
    let width_bits = color_index_width_bits(palette.len());
    let pixels_per = 1usize << width_bits;
    let bits_per = 8 / pixels_per as u32;
    let bundled_width = div_round_up(width, pixels_per as u32);
    let index_of: HashMap<u32, u32> = palette
        .iter()
        .enumerate()
        .map(|(i, &c)| (c, i as u32))
        .collect();
    let w = width as usize;
    let bw = bundled_width as usize;
    let mut bundled = vec![0xff00_0000u32; bw * height as usize];
    for y in 0..height as usize {
        for x in 0..w {
            let index = index_of.get(&pixels[y * w + x]).copied().unwrap_or(0);
            let pos = y * bw + x / pixels_per;
            let shift = (x % pixels_per) as u32 * bits_per;
            let packed = u32::from(green(bundled[pos])) | (index << shift);
            bundled[pos] = make_argb(0xff, 0, packed as u8, 0);
        }
    }
    (bundled, bundled_width)
}

/// Inverse color-indexing: expands the bundled index image (`bundled_width` × `height`) back to
/// `expanded_width` columns, replacing each green-channel index with its `table` color (or
/// transparent black when the index is out of range) (RFC 9649 §4.4).
#[must_use]
pub fn inverse_color_indexing(
    bundled: &[u32],
    bundled_width: u32,
    height: u32,
    expanded_width: u32,
    table: &[u32],
) -> Vec<u32> {
    let width_bits = color_index_width_bits(table.len());
    let bw = bundled_width as usize;
    let ew = expanded_width as usize;
    let mut out = vec![0u32; ew * height as usize];
    let pixels_per = 1usize << width_bits; // 1, 2, 4, or 8
    let bits_per = 8 / pixels_per; // 8, 4, 2, or 1
    let mask = (1u32 << bits_per) - 1;
    for y in 0..height as usize {
        for x in 0..ew {
            let index = if width_bits == 0 {
                u32::from(green(bundled[y * bw + x]))
            } else {
                let packed = u32::from(green(bundled[y * bw + x / pixels_per]));
                (packed >> ((x % pixels_per) * bits_per)) & mask
            };
            out[y * ew + x] = table.get(index as usize).copied().unwrap_or(0);
        }
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn subtract_green_inverse_adds_green() {
        // green=10; red stored as (50-10)=40, blue stored as (200-10)=190 → reconstruct 50/200.
        let mut px = [make_argb(0xff, 40, 10, 190)];
        inverse_subtract_green(&mut px);
        assert_eq!(px[0], make_argb(0xff, 50, 10, 200));
        // wraps mod 256
        let mut wrap = [make_argb(0, 250, 10, 0)];
        inverse_subtract_green(&mut wrap);
        assert_eq!(red(wrap[0]), 250u8.wrapping_add(10));
    }

    #[test]
    fn predict_basic_modes() {
        let l = make_argb(1, 2, 3, 4);
        let t = make_argb(5, 6, 7, 8);
        let tl = make_argb(9, 10, 11, 12);
        let tr = make_argb(13, 14, 15, 16);
        assert_eq!(predict(0, l, t, tl, tr), 0xff00_0000);
        assert_eq!(predict(1, l, t, tl, tr), l);
        assert_eq!(predict(2, l, t, tl, tr), t);
        assert_eq!(predict(3, l, t, tl, tr), tr);
        assert_eq!(predict(4, l, t, tl, tr), tl);
        assert_eq!(predict(7, l, t, tl, tr), average2(l, t));
        // Out-of-range mode falls back to opaque black.
        assert_eq!(predict(99, l, t, tl, tr), 0xff00_0000);
    }

    #[test]
    fn average2_rounds_down_per_channel() {
        let a = make_argb(10, 20, 30, 41);
        let b = make_argb(11, 21, 31, 40);
        assert_eq!(average2(a, b), make_argb(10, 20, 30, 40));
    }

    #[test]
    fn select_picks_closer_neighbor() {
        // When L equals the L+T-TL estimate exactly, Select returns L.
        let l = make_argb(100, 100, 100, 100);
        let t = make_argb(50, 50, 50, 50);
        let tl = make_argb(50, 50, 50, 50);
        // estimate = L + T - TL = L, so distance to L is 0 → returns L.
        assert_eq!(select(l, t, tl), l);
    }

    #[test]
    fn clamp_predictors_saturate() {
        let a = make_argb(200, 200, 200, 200);
        let b = make_argb(200, 200, 200, 200);
        let c = make_argb(0, 0, 0, 0);
        // 200 + 200 - 0 = 400 → clamps to 255 per channel.
        assert_eq!(
            clamp_add_subtract_full(a, b, c),
            make_argb(255, 255, 255, 255)
        );
        // half: 200 + (200-0)/2 = 300 → 255.
        assert_eq!(clamp_add_subtract_half(a, c), make_argb(255, 255, 255, 255));
    }

    #[test]
    fn color_transform_delta_sign_extends() {
        // t = -1 (0xff as i8), c = 64 → (-1 * 64) >> 5 = -2.
        assert_eq!(color_transform_delta(-1, 64), -2);
        assert_eq!(color_transform_delta(0, 100), 0);
        assert_eq!(color_transform_delta(32, 64), (32 * 64) >> 5);
    }

    /// A test-local forward predictor mirroring `inverse_predictor`, to validate the inverse end to
    /// end (scan order + every border rule) without depending on the encoder.
    fn forward_predictor_fixed_mode(
        orig: &[u32],
        width: usize,
        height: usize,
        mode: u8,
    ) -> Vec<u32> {
        let mut res = vec![0u32; orig.len()];
        for y in 0..height {
            for x in 0..width {
                let idx = y * width + x;
                let pred = if x == 0 && y == 0 {
                    0xff00_0000
                } else if y == 0 {
                    orig[idx - 1]
                } else if x == 0 {
                    orig[idx - width]
                } else {
                    let tr = if x + 1 < width {
                        orig[idx - width + 1]
                    } else {
                        orig[y * width]
                    };
                    predict(
                        mode,
                        orig[idx - 1],
                        orig[idx - width],
                        orig[idx - width - 1],
                        tr,
                    )
                };
                res[idx] = make_argb(
                    alpha(orig[idx]).wrapping_sub(alpha(pred)),
                    red(orig[idx]).wrapping_sub(red(pred)),
                    green(orig[idx]).wrapping_sub(green(pred)),
                    blue(orig[idx]).wrapping_sub(blue(pred)),
                );
            }
        }
        res
    }

    #[test]
    fn predictor_round_trips_every_mode_with_borders() {
        let (w, h) = (5usize, 4usize);
        let orig: Vec<u32> = (0..(w * h))
            .map(|i| {
                make_argb(
                    (i * 3) as u8,
                    (i * 7 + 1) as u8,
                    (i ^ 0x5a) as u8,
                    (i * 13) as u8,
                )
            })
            .collect();
        for mode in 0..NUM_PREDICTOR_MODES {
            // One block covering the whole image: size_bits large enough that transform_width == 1.
            let blocks = vec![make_argb(0xff, 0, mode, 0)];
            let mut res = forward_predictor_fixed_mode(&orig, w, h, mode);
            inverse_predictor(&mut res, w as u32, h as u32, 3, &blocks);
            assert_eq!(res, orig, "mode {mode} failed to round-trip");
        }
    }

    #[test]
    fn color_transform_round_trips() {
        let (w, h) = (3usize, 2usize);
        let orig: Vec<u32> = (0..(w * h))
            .map(|i| make_argb(0xff, (i * 17) as u8, (i * 5 + 3) as u8, (i * 29) as u8))
            .collect();
        let elem = make_argb(0xff, 7, 200, 30); // red_to_blue=7, green_to_blue=200, green_to_red=30
        let blocks = vec![elem];
        // Forward: subtract the same deltas the inverse adds.
        let mut transformed = orig.clone();
        for p in transformed.iter_mut() {
            let g = green(*p);
            let green_to_red = blue(elem) as i8;
            let green_to_blue = green(elem) as i8;
            let red_to_blue = red(elem) as i8;
            // Forward subtracts deltas; the red_to_blue term uses the ORIGINAL red (the inverse
            // adds it back using the reconstructed red, which equals the original).
            let orig_red = red(*p);
            let new_red =
                (i32::from(orig_red) - color_transform_delta(green_to_red, g as i8)) & 0xff;
            let new_blue = (i32::from(blue(*p))
                - color_transform_delta(green_to_blue, g as i8)
                - color_transform_delta(red_to_blue, orig_red as i8))
                & 0xff;
            *p = make_argb(alpha(*p), new_red as u8, g, new_blue as u8);
        }
        inverse_color(&mut transformed, w as u32, h as u32, 3, &blocks);
        assert_eq!(transformed, orig);
    }

    #[test]
    fn color_indexing_unbundles() {
        // width_bits = 0 (no bundling) needs a table larger than 16 entries.
        assert_eq!(color_index_width_bits(32), 0);
        let table: Vec<u32> = (0..32)
            .map(|i| make_argb(0xff, i as u8, i as u8, 0))
            .collect();
        let bundled: Vec<u32> = [0u8, 31, 1, 2]
            .iter()
            .map(|&i| make_argb(0xff, 0, i, 0))
            .collect();
        let out = inverse_color_indexing(&bundled, 4, 1, 4, &table);
        assert_eq!(out, vec![table[0], table[31], table[1], table[2]]);

        // Out-of-range index (>= table size) → transparent black.
        let oob = vec![make_argb(0xff, 0, 200, 0)];
        let out2 = inverse_color_indexing(&oob, 1, 1, 1, &table);
        assert_eq!(out2, vec![0]);
    }

    #[test]
    fn color_indexing_bundled_width_bits() {
        // 2-color table → width_bits = 3 → 8 indices packed per pixel (1 bit each), low bit first.
        assert_eq!(color_index_width_bits(2), 3);
        let table = vec![0xff00_0000, 0xffff_ffff];
        // packed green = 0b1010_1010 → indices for x=0..8: 0,1,0,1,0,1,0,1
        let bundled = vec![make_argb(0xff, 0, 0b1010_1010, 0)];
        let out = inverse_color_indexing(&bundled, 1, 1, 8, &table);
        let expected: Vec<u32> = (0..8)
            .map(|x| if x % 2 == 1 { table[1] } else { table[0] })
            .collect();
        assert_eq!(out, expected);

        // 16-color table → width_bits = 1 → 2 nibble indices per pixel.
        assert_eq!(color_index_width_bits(16), 1);
        let table16: Vec<u32> = (0..16).map(|i| make_argb(0xff, 0, 0, i as u8)).collect();
        // green = 0x53 → low nibble 3 (x=0), high nibble 5 (x=1)
        let bundled16 = vec![make_argb(0xff, 0, 0x53, 0)];
        let out16 = inverse_color_indexing(&bundled16, 1, 1, 2, &table16);
        assert_eq!(out16, vec![table16[3], table16[5]]);
    }

    #[test]
    fn forward_subtract_green_inverts() {
        let mut px: Vec<u32> = (0..20)
            .map(|i| make_argb(i as u8, (i * 3) as u8, (i * 7 + 1) as u8, (i * 11) as u8))
            .collect();
        let orig = px.clone();
        forward_subtract_green(&mut px);
        inverse_subtract_green(&mut px);
        assert_eq!(px, orig);
    }

    #[test]
    fn forward_predictor_inverts() {
        let (w, h) = (6u32, 4u32);
        let orig: Vec<u32> = (0..(w * h))
            .map(|i| make_argb(0xff, (i * 3) as u8, (i ^ 0x2a) as u8, (i * 5) as u8))
            .collect();
        let (residual, sub) = forward_predictor(&orig, w, h, 2);
        let mut recon = residual;
        inverse_predictor(&mut recon, w, h, 2, &sub);
        assert_eq!(recon, orig);
    }

    #[test]
    fn forward_color_inverts() {
        let (w, h) = (5u32, 3u32);
        let orig: Vec<u32> = (0..(w * h))
            .map(|i| make_argb(0xff, (i * 9) as u8, (i * 5 + 3) as u8, (i * 29) as u8))
            .collect();
        let mut px = orig.clone();
        let sub = forward_color(&mut px, w, h, 2);
        inverse_color(&mut px, w, h, 2, &sub);
        assert_eq!(px, orig);
    }

    #[test]
    fn forward_color_indexing_inverts() {
        // 3-color palette -> width_bits 2 -> 4 indices packed per pixel.
        let palette = vec![
            make_argb(0xff, 10, 20, 30),
            make_argb(0x80, 40, 50, 60),
            make_argb(0xff, 0, 0, 0),
        ];
        let (w, h) = (5u32, 2u32);
        let orig: Vec<u32> = (0..(w * h)).map(|i| palette[(i % 3) as usize]).collect();
        let (bundled, bundled_width) = forward_color_indexing(&orig, w, h, &palette);
        assert_eq!(bundled_width, 2); // div_round_up(5, 4)
        let restored = inverse_color_indexing(&bundled, bundled_width, h, w, &palette);
        assert_eq!(restored, orig);
    }

    #[test]
    fn forward_color_indexing_two_color_bundles_eight() {
        let palette = vec![make_argb(0xff, 1, 2, 3), make_argb(0xff, 9, 9, 9)];
        let (w, h) = (10u32, 1u32);
        let orig: Vec<u32> = (0..w).map(|i| palette[(i % 2) as usize]).collect();
        let (bundled, bundled_width) = forward_color_indexing(&orig, w, h, &palette);
        assert_eq!(bundled_width, 2); // div_round_up(10, 8)
        let restored = inverse_color_indexing(&bundled, bundled_width, h, w, &palette);
        assert_eq!(restored, orig);
    }

    #[test]
    fn apply_inverse_dispatches_and_expands_width() {
        let table = vec![0xff00_0000, 0xffaa_bbcc];
        let mut pixels = vec![make_argb(0xff, 0, 0b0000_0001, 0)]; // 8 bundled bits: 1,0,0,0,0,0,0,0
        let t = Vp8lTransform::ColorIndexing {
            table: table.clone(),
            expanded_width: 8,
        };
        let new_width = t.apply_inverse(&mut pixels, 1, 1);
        assert_eq!(new_width, 8);
        assert_eq!(pixels.len(), 8);
        assert_eq!(pixels[0], table[1]);
        assert_eq!(pixels[1], table[0]);
    }
}