use crate::error::{Result, WebpError as Error};
use super::VP8L_SIGNATURE;
const MAX_CODE_LENGTH: u8 = 15;
const LZ_WINDOW: usize = 16384;
const LZ_MAX_TRIES: usize = 256;
const MIN_MATCH: usize = 3;
const MAX_MATCH: usize = 4096;
const COLOR_CACHE_BITS: u32 = 8;
const PREDICTOR_TILE_BITS: u32 = 4;
const PREDICTOR_TILE_BITS_SWEEP: &[u32] = &[3, 4, 5];
const COLOR_TRANSFORM_TILE_BITS: u32 = 5;
const COLOR_COEFF_GRID: [i8; 16] = [
-24, -21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21,
];
const COLOR_R2B_GRID: [i8; 5] = [-12, -6, 0, 6, 12];
const PREDICTOR_MODES: &[u32] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
struct BitWriter {
out: Vec<u8>,
cur: u64,
nbits: u32,
}
#[derive(Clone, Copy)]
struct BitWriterMark {
out_len: usize,
cur: u64,
nbits: u32,
}
impl BitWriter {
fn new() -> Self {
Self {
out: Vec::new(),
cur: 0,
nbits: 0,
}
}
fn write(&mut self, value: u32, n: u32) {
debug_assert!(n <= 32);
let mask = if n == 0 {
0u64
} else if n == 32 {
0xFFFF_FFFFu64
} else {
(1u64 << n) - 1
};
self.cur |= ((value as u64) & mask) << self.nbits;
self.nbits += n;
while self.nbits >= 8 {
self.out.push((self.cur & 0xff) as u8);
self.cur >>= 8;
self.nbits -= 8;
}
}
fn mark(&self) -> BitWriterMark {
BitWriterMark {
out_len: self.out.len(),
cur: self.cur,
nbits: self.nbits,
}
}
fn restore(&mut self, mark: BitWriterMark) {
self.out.truncate(mark.out_len);
self.cur = mark.cur;
self.nbits = mark.nbits;
}
fn bit_pos(&self) -> u64 {
(self.out.len() as u64) * 8 + self.nbits as u64
}
fn finish(mut self) -> Vec<u8> {
if self.nbits > 0 {
self.out.push((self.cur & 0xff) as u8);
}
self.out
}
}
impl BitWriterMark {
fn bit_pos(&self) -> u64 {
(self.out_len as u64) * 8 + self.nbits as u64
}
}
pub fn encode_vp8l_argb(
width: u32,
height: u32,
pixels: &[u32],
has_alpha: bool,
) -> Result<Vec<u8>> {
encode_vp8l_argb_rdo(width, height, pixels, has_alpha)
}
fn strip_transparent_rgb(pixels: &mut [u32]) {
for p in pixels.iter_mut() {
if (*p >> 24) & 0xff == 0 {
*p &= 0xff00_0000;
}
}
}
fn near_lossless_shift(level: u8) -> Option<u32> {
match level {
100..=u8::MAX => None,
60..=99 => Some(1),
40..=59 => Some(2),
20..=39 => Some(3),
0..=19 => Some(4),
}
}
fn apply_near_lossless(pixels: &mut [u32], shift: u32) {
debug_assert!((1..=7).contains(&shift));
let step = 1u32 << shift;
let bias = step >> 1;
let mask = !(step - 1);
for p in pixels.iter_mut() {
let a = (*p >> 24) & 0xff;
let r = (*p >> 16) & 0xff;
let g = (*p >> 8) & 0xff;
let b = *p & 0xff;
let nr = quantise_channel(r, bias, mask);
let ng = quantise_channel(g, bias, mask);
let nb = quantise_channel(b, bias, mask);
*p = (a << 24) | (nr << 16) | (ng << 8) | nb;
}
}
fn apply_near_lossless_smoothing(
pixels: &mut [u32],
original: &[u32],
width: u32,
height: u32,
shift: u32,
) {
let w = width as usize;
let h = height as usize;
if w < 3 || h < 3 {
return;
}
debug_assert!((1..=7).contains(&shift));
debug_assert_eq!(pixels.len(), original.len());
const MAJORITY: u32 = 6;
let step = 1u32 << shift;
let snapshot: Vec<u32> = pixels.to_vec();
for y in 1..h - 1 {
for x in 1..w - 1 {
let centre = snapshot[y * w + x];
let mut counts: [(u32, u32); 9] = [(0, 0); 9];
let mut n_distinct = 0usize;
for dy in 0..3 {
for dx in 0..3 {
let p = snapshot[(y + dy - 1) * w + (x + dx - 1)];
let mut found = false;
for slot in counts.iter_mut().take(n_distinct) {
if slot.0 == p {
slot.1 += 1;
found = true;
break;
}
}
if !found {
counts[n_distinct] = (p, 1);
n_distinct += 1;
}
}
}
let mut best = (0u32, 0u32);
for slot in counts.iter().take(n_distinct) {
if slot.1 > best.1 {
best = *slot;
}
}
if best.1 < MAJORITY || centre == best.0 {
continue;
}
let orig = original[y * w + x];
let oa = (orig >> 24) & 0xff;
let ba = (best.0 >> 24) & 0xff;
if oa != ba {
continue;
}
let or = (orig >> 16) & 0xff;
let og = (orig >> 8) & 0xff;
let ob = orig & 0xff;
let br = (best.0 >> 16) & 0xff;
let bg = (best.0 >> 8) & 0xff;
let bb = best.0 & 0xff;
if or.abs_diff(br) <= step && og.abs_diff(bg) <= step && ob.abs_diff(bb) <= step {
pixels[y * w + x] = best.0;
}
}
}
}
#[inline]
fn quantise_channel(v: u32, bias: u32, mask: u32) -> u32 {
let raised = (v + bias).min(255);
raised & mask
}
#[doc(hidden)]
#[derive(Clone, Copy)]
pub struct EncoderOptions {
pub use_subtract_green: bool,
pub use_color_transform: bool,
pub use_predictor: bool,
pub use_color_cache: bool,
pub cache_bits: u32,
pub strip_transparent_color: bool,
pub use_color_index: bool,
pub near_lossless: u8,
pub predictor_tile_bits: u32,
}
impl Default for EncoderOptions {
fn default() -> Self {
Self {
use_subtract_green: true,
use_color_transform: true,
use_predictor: true,
use_color_cache: true,
cache_bits: COLOR_CACHE_BITS,
strip_transparent_color: true,
use_color_index: true,
near_lossless: 100,
predictor_tile_bits: PREDICTOR_TILE_BITS,
}
}
}
impl EncoderOptions {
#[doc(hidden)]
pub fn bare() -> Self {
Self {
use_subtract_green: false,
use_color_transform: false,
use_predictor: false,
use_color_cache: false,
cache_bits: COLOR_CACHE_BITS,
strip_transparent_color: true,
use_color_index: false,
near_lossless: 100,
predictor_tile_bits: PREDICTOR_TILE_BITS,
}
}
#[doc(hidden)]
pub fn subtract_green_only() -> Self {
Self {
use_subtract_green: true,
use_color_transform: false,
use_predictor: false,
use_color_cache: false,
cache_bits: COLOR_CACHE_BITS,
strip_transparent_color: true,
use_color_index: false,
near_lossless: 100,
predictor_tile_bits: PREDICTOR_TILE_BITS,
}
}
}
#[doc(hidden)]
pub fn encode_vp8l_argb_with(
width: u32,
height: u32,
pixels: &[u32],
has_alpha: bool,
opts: EncoderOptions,
) -> Result<Vec<u8>> {
if width == 0 || height == 0 {
return Err(Error::invalid("VP8L encoder: zero-size image"));
}
if width > 16384 || height > 16384 {
return Err(Error::invalid("VP8L encoder: max dimension 16384"));
}
if (pixels.len() as u64) != (width as u64) * (height as u64) {
return Err(Error::invalid("VP8L encoder: pixel count mismatch"));
}
let mut bw = BitWriter::new();
bw.write(VP8L_SIGNATURE as u32, 8);
bw.write(width - 1, 14);
bw.write(height - 1, 14);
bw.write(if has_alpha { 1 } else { 0 }, 1);
bw.write(0, 3);
let mut working = pixels.to_vec();
if opts.strip_transparent_color {
strip_transparent_rgb(&mut working);
}
if let Some(shift) = near_lossless_shift(opts.near_lossless) {
let original = working.clone();
apply_near_lossless(&mut working, shift);
apply_near_lossless_smoothing(&mut working, &original, width, height, shift);
}
let mut current_width = width;
let mut palette_active = false;
if opts.use_color_index {
if let Some(palette) = build_palette(&working) {
palette_active = true;
let bits_per_pixel = bits_per_pixel_for(palette.len() as u32);
let pack = 8u32 / bits_per_pixel;
let packed_w = (width + pack - 1) / pack;
bw.write(1, 1);
bw.write(3, 2);
bw.write(palette.len() as u32 - 1, 8);
let palette_delta = delta_encode_palette(&palette);
encode_image_stream(&mut bw, &palette_delta, palette.len() as u32, 1, false, 0)?;
working =
pack_palette_indices(&working, width, height, &palette, bits_per_pixel, packed_w);
current_width = packed_w;
}
}
if !palette_active && opts.use_subtract_green {
bw.write(1, 1);
bw.write(2, 2);
apply_subtract_green_forward(&mut working);
}
if !palette_active && opts.use_color_transform {
let tile_bits = COLOR_TRANSFORM_TILE_BITS;
let tile_side = 1u32 << tile_bits;
let sub_w = (width + tile_side - 1) / tile_side;
let sub_h = (height + tile_side - 1) / tile_side;
let coeffs = choose_color_transform(&working, width, height, tile_bits, sub_w, sub_h);
bw.write(1, 1);
bw.write(1, 2);
bw.write(tile_bits - 2, 3);
let sub_pixels: Vec<u32> = coeffs
.iter()
.map(|c| {
let g2r = (c.g2r as u8) as u32;
let g2b = (c.g2b as u8) as u32;
let r2b = (c.r2b as u8) as u32;
0xff00_0000 | (r2b << 16) | (g2b << 8) | g2r
})
.collect();
encode_image_stream(&mut bw, &sub_pixels, sub_w, sub_h, false, 0)?;
working = apply_color_transform_forward(&working, width, height, tile_bits, &coeffs, sub_w);
}
if !palette_active && opts.use_predictor {
let tile_bits = if (2..=9).contains(&opts.predictor_tile_bits) {
opts.predictor_tile_bits
} else {
PREDICTOR_TILE_BITS
};
let tile_side = 1u32 << tile_bits;
let sub_w = (width + tile_side - 1) / tile_side;
let sub_h = (height + tile_side - 1) / tile_side;
let modes = choose_predictor_modes(&working, width, height, tile_bits, sub_w, sub_h);
bw.write(1, 1);
bw.write(0, 2);
bw.write(tile_bits - 2, 3);
let sub_pixels: Vec<u32> = modes
.iter()
.map(|&m| 0xff00_0000 | ((m & 0xff) << 8))
.collect();
encode_image_stream(&mut bw, &sub_pixels, sub_w, sub_h, false, 0)?;
working = apply_predictor_forward(&working, width, height, tile_bits, &modes, sub_w);
}
bw.write(0, 1);
let cache_bits = if opts.use_color_cache {
if (1..=11).contains(&opts.cache_bits) {
opts.cache_bits
} else {
COLOR_CACHE_BITS
}
} else {
0
};
let stream_h = height;
encode_image_stream(&mut bw, &working, current_width, stream_h, true, cache_bits)?;
Ok(bw.finish())
}
fn encode_vp8l_argb_rdo(
width: u32,
height: u32,
pixels: &[u32],
has_alpha: bool,
) -> Result<Vec<u8>> {
const CACHE_BITS_GRID: [u32; 8] = [0, 4, 6, 7, 8, 9, 10, 11];
let mut stripped: Vec<u32> = pixels.to_vec();
strip_transparent_rgb(&mut stripped);
let mut best_palette: Option<Vec<u8>> = None;
let mut best_non_palette: Option<Vec<u8>> = None;
for &use_palette in &[true, false] {
for &use_sg in &[true, false] {
for &use_ct in &[true, false] {
for &use_pr in &[true, false] {
for &cb in CACHE_BITS_GRID.iter() {
let pred_tile_bits_iter: &[u32] = if use_pr && !use_palette {
PREDICTOR_TILE_BITS_SWEEP
} else {
&[PREDICTOR_TILE_BITS]
};
for &ptb in pred_tile_bits_iter {
let opts = EncoderOptions {
use_subtract_green: use_sg,
use_color_transform: use_ct,
use_predictor: use_pr,
use_color_cache: cb > 0,
cache_bits: if cb > 0 { cb } else { COLOR_CACHE_BITS },
strip_transparent_color: false,
use_color_index: use_palette,
near_lossless: 100,
predictor_tile_bits: ptb,
};
let bytes =
encode_vp8l_argb_with(width, height, &stripped, has_alpha, opts)?;
let slot = if use_palette {
&mut best_palette
} else {
&mut best_non_palette
};
if slot.as_ref().map(|b| bytes.len() < b.len()).unwrap_or(true) {
*slot = Some(bytes);
}
}
}
}
}
}
}
const PALETTE_PREFERENCE_SLACK_BYTES: usize = 16;
let best = match (best_palette, best_non_palette) {
(Some(pal), Some(np)) => {
if pal.len() <= np.len().saturating_add(PALETTE_PREFERENCE_SLACK_BYTES) {
pal
} else {
np
}
}
(Some(pal), None) => pal,
(None, Some(np)) => np,
(None, None) => {
return Err(Error::invalid("RDO produced no candidate"));
}
};
Ok(best)
}
fn encode_image_stream(
bw: &mut BitWriter,
pixels: &[u32],
width: u32,
height: u32,
main_image: bool,
cache_bits: u32,
) -> Result<()> {
let cache_size = if cache_bits == 0 {
0u32
} else {
1u32 << cache_bits
};
let stream = if main_image {
build_cost_modelled_stream(pixels, width, height, cache_bits)
} else {
build_symbol_stream(pixels, width, height, cache_bits, None)
};
let baseline_mark = bw.mark();
encode_image_stream_single_group(bw, &stream, cache_bits, cache_size, main_image)?;
let baseline_bits = bw.bit_pos() - baseline_mark.bit_pos();
if !main_image {
return Ok(());
}
let mut best_bits = baseline_bits;
let mut best_kind = MetaHuffmanWinner::SingleGroup;
let mut best_mark_after = bw.mark();
let pixel_count = (width as u64) * (height as u64);
for &k in &[2u32, 4u32, 8u32, 16u32] {
match k {
4 if pixel_count < META_HUFFMAN_K4_MIN_PIXELS => continue,
8 if pixel_count < META_HUFFMAN_K8_MIN_PIXELS => continue,
16 if pixel_count < META_HUFFMAN_K16_MIN_PIXELS => continue,
_ => {}
}
bw.restore(baseline_mark);
let emitted = try_encode_meta_huffman(
bw, &stream, width, height, cache_bits, cache_size, k as usize,
)?;
if !emitted {
continue;
}
let bits = bw.bit_pos() - baseline_mark.bit_pos();
if bits < best_bits {
best_bits = bits;
best_kind = MetaHuffmanWinner::Groups(k as usize);
best_mark_after = bw.mark();
}
}
match best_kind {
MetaHuffmanWinner::SingleGroup => {
bw.restore(baseline_mark);
encode_image_stream_single_group(bw, &stream, cache_bits, cache_size, main_image)?;
debug_assert_eq!(bw.bit_pos() - baseline_mark.bit_pos(), baseline_bits);
}
MetaHuffmanWinner::Groups(k) => {
if bw.bit_pos() != best_mark_after.bit_pos() {
bw.restore(baseline_mark);
let emitted =
try_encode_meta_huffman(bw, &stream, width, height, cache_bits, cache_size, k)?;
debug_assert!(emitted, "winning K trial must re-emit cleanly");
debug_assert_eq!(bw.bit_pos() - baseline_mark.bit_pos(), best_bits);
}
}
}
Ok(())
}
#[derive(Clone, Copy)]
enum MetaHuffmanWinner {
SingleGroup,
Groups(usize),
}
const META_HUFFMAN_K4_MIN_PIXELS: u64 = 4096;
const META_HUFFMAN_K8_MIN_PIXELS: u64 = 16384;
const META_HUFFMAN_K16_MIN_PIXELS: u64 = 65536;
fn encode_image_stream_single_group(
bw: &mut BitWriter,
stream: &[StreamSym],
cache_bits: u32,
cache_size: u32,
main_image: bool,
) -> Result<()> {
if cache_bits > 0 {
bw.write(1, 1);
bw.write(cache_bits, 4);
} else {
bw.write(0, 1);
}
if main_image {
bw.write(0, 1);
}
let green_alpha = 256 + 24 + cache_size as usize;
let mut green_freq = vec![0u32; green_alpha];
let mut red_freq = vec![0u32; 256];
let mut blue_freq = vec![0u32; 256];
let mut alpha_freq = vec![0u32; 256];
let mut dist_freq = vec![0u32; 40];
for sym in stream {
accumulate_symbol_freq(
sym,
&mut green_freq,
&mut red_freq,
&mut blue_freq,
&mut alpha_freq,
&mut dist_freq,
);
}
let (green_lens, green_codes) = build_and_emit_huffman_tree(bw, &green_freq, MAX_CODE_LENGTH)?;
let (red_lens, red_codes) = build_and_emit_huffman_tree(bw, &red_freq, MAX_CODE_LENGTH)?;
let (blue_lens, blue_codes) = build_and_emit_huffman_tree(bw, &blue_freq, MAX_CODE_LENGTH)?;
let (alpha_lens, alpha_codes) = build_and_emit_huffman_tree(bw, &alpha_freq, MAX_CODE_LENGTH)?;
let (dist_lens, dist_codes) = build_and_emit_huffman_tree(bw, &dist_freq, MAX_CODE_LENGTH)?;
for sym in stream {
emit_symbol(
bw,
sym,
&green_codes,
&green_lens,
&red_codes,
&red_lens,
&blue_codes,
&blue_lens,
&alpha_codes,
&alpha_lens,
&dist_codes,
&dist_lens,
);
}
Ok(())
}
#[inline]
fn accumulate_symbol_freq(
sym: &StreamSym,
green: &mut [u32],
red: &mut [u32],
blue: &mut [u32],
alpha: &mut [u32],
dist: &mut [u32],
) {
match *sym {
StreamSym::Literal { a, r, g, b } => {
green[g as usize] += 1;
red[r as usize] += 1;
blue[b as usize] += 1;
alpha[a as usize] += 1;
}
StreamSym::Backref {
len_sym, dist_sym, ..
} => {
green[256 + len_sym as usize] += 1;
dist[dist_sym as usize] += 1;
}
StreamSym::CacheRef { index } => {
green[256 + 24 + index as usize] += 1;
}
}
}
#[inline]
#[allow(clippy::too_many_arguments)]
fn emit_symbol(
bw: &mut BitWriter,
sym: &StreamSym,
green_codes: &[u32],
green_lens: &[u8],
red_codes: &[u32],
red_lens: &[u8],
blue_codes: &[u32],
blue_lens: &[u8],
alpha_codes: &[u32],
alpha_lens: &[u8],
dist_codes: &[u32],
dist_lens: &[u8],
) {
match *sym {
StreamSym::Literal { a, r, g, b } => {
write_code(bw, green_codes, green_lens, g as usize);
write_code(bw, red_codes, red_lens, r as usize);
write_code(bw, blue_codes, blue_lens, b as usize);
write_code(bw, alpha_codes, alpha_lens, a as usize);
}
StreamSym::Backref {
len_sym,
len_extra_bits,
len_extra,
dist_sym,
dist_extra_bits,
dist_extra,
} => {
write_code(bw, green_codes, green_lens, 256 + len_sym as usize);
if len_extra_bits > 0 {
bw.write(len_extra, len_extra_bits);
}
write_code(bw, dist_codes, dist_lens, dist_sym as usize);
if dist_extra_bits > 0 {
bw.write(dist_extra, dist_extra_bits);
}
}
StreamSym::CacheRef { index } => {
write_code(bw, green_codes, green_lens, 256 + 24 + index as usize);
}
}
}
#[inline]
fn symbol_pixel_span(sym: &StreamSym) -> usize {
match *sym {
StreamSym::Literal { .. } | StreamSym::CacheRef { .. } => 1,
StreamSym::Backref {
len_sym,
len_extra_bits: _,
len_extra,
..
} => decode_len_or_dist_value(len_sym, len_extra) as usize,
}
}
#[inline]
fn decode_len_or_dist_value(symbol: u32, extra: u32) -> u32 {
if symbol < 4 {
symbol + 1
} else {
let extra_bits = (symbol - 2) >> 1;
let offset = (2 + (symbol & 1)) << extra_bits;
offset + extra + 1
}
}
fn try_encode_meta_huffman(
bw: &mut BitWriter,
stream: &[StreamSym],
width: u32,
height: u32,
cache_bits: u32,
cache_size: u32,
k: usize,
) -> Result<bool> {
const META_BITS: u32 = 4;
debug_assert!((2..=16).contains(&k), "K must be in 2..=16");
let tile_side = 1u32 << META_BITS;
let meta_w = (width + tile_side - 1) / tile_side;
let meta_h = (height + tile_side - 1) / tile_side;
let num_tiles = (meta_w * meta_h) as usize;
if num_tiles < k {
return Ok(false);
}
if (width as u64) * (height as u64) < 1024 {
return Ok(false);
}
let green_alpha = 256 + 24 + cache_size as usize;
let mut tile_green: Vec<Vec<u32>> = (0..num_tiles).map(|_| vec![0u32; green_alpha]).collect();
let mut tile_red: Vec<Vec<u32>> = (0..num_tiles).map(|_| vec![0u32; 256]).collect();
let mut tile_blue: Vec<Vec<u32>> = (0..num_tiles).map(|_| vec![0u32; 256]).collect();
let mut tile_alpha: Vec<Vec<u32>> = (0..num_tiles).map(|_| vec![0u32; 256]).collect();
let mut tile_dist: Vec<Vec<u32>> = (0..num_tiles).map(|_| vec![0u32; 40]).collect();
let mut sym_tile: Vec<usize> = Vec::with_capacity(stream.len());
let mut x: u32 = 0;
let mut y: u32 = 0;
for sym in stream {
let mx = x >> META_BITS;
let my = y >> META_BITS;
let tile_idx = (my * meta_w + mx) as usize;
sym_tile.push(tile_idx);
accumulate_symbol_freq(
sym,
&mut tile_green[tile_idx],
&mut tile_red[tile_idx],
&mut tile_blue[tile_idx],
&mut tile_alpha[tile_idx],
&mut tile_dist[tile_idx],
);
let span = symbol_pixel_span(sym) as u32;
let new_x = x + span;
y += new_x / width;
x = new_x % width;
}
let assignments = cluster_tiles_kmeans(&tile_green, num_tiles, k);
let used_groups: std::collections::BTreeSet<u32> = assignments.iter().copied().collect();
if used_groups.len() < k {
return Ok(false);
}
let mut groups_green: Vec<Vec<u32>> = (0..k).map(|_| vec![0u32; green_alpha]).collect();
let mut groups_red: Vec<Vec<u32>> = (0..k).map(|_| vec![0u32; 256]).collect();
let mut groups_blue: Vec<Vec<u32>> = (0..k).map(|_| vec![0u32; 256]).collect();
let mut groups_alpha: Vec<Vec<u32>> = (0..k).map(|_| vec![0u32; 256]).collect();
let mut groups_dist: Vec<Vec<u32>> = (0..k).map(|_| vec![0u32; 40]).collect();
for t in 0..num_tiles {
let g = assignments[t] as usize;
for (i, v) in tile_green[t].iter().enumerate() {
groups_green[g][i] += v;
}
for (i, v) in tile_red[t].iter().enumerate() {
groups_red[g][i] += v;
}
for (i, v) in tile_blue[t].iter().enumerate() {
groups_blue[g][i] += v;
}
for (i, v) in tile_alpha[t].iter().enumerate() {
groups_alpha[g][i] += v;
}
for (i, v) in tile_dist[t].iter().enumerate() {
groups_dist[g][i] += v;
}
}
if cache_bits > 0 {
bw.write(1, 1);
bw.write(cache_bits, 4);
} else {
bw.write(0, 1);
}
bw.write(1, 1);
bw.write(META_BITS - 2, 3);
let meta_pixels: Vec<u32> = assignments
.iter()
.map(|&g| 0xff00_0000 | ((g & 0xff) << 8))
.collect();
encode_image_stream(bw, &meta_pixels, meta_w, meta_h, false, 0)?;
let mut group_lens: Vec<[Vec<u8>; 5]> = Vec::with_capacity(k);
let mut group_codes: Vec<[Vec<u32>; 5]> = Vec::with_capacity(k);
for g in 0..k {
let (gl, gc) = build_and_emit_huffman_tree(bw, &groups_green[g], MAX_CODE_LENGTH)?;
let (rl, rc) = build_and_emit_huffman_tree(bw, &groups_red[g], MAX_CODE_LENGTH)?;
let (bl, bc) = build_and_emit_huffman_tree(bw, &groups_blue[g], MAX_CODE_LENGTH)?;
let (al, ac) = build_and_emit_huffman_tree(bw, &groups_alpha[g], MAX_CODE_LENGTH)?;
let (dl, dc) = build_and_emit_huffman_tree(bw, &groups_dist[g], MAX_CODE_LENGTH)?;
group_lens.push([gl, rl, bl, al, dl]);
group_codes.push([gc, rc, bc, ac, dc]);
}
for (idx, sym) in stream.iter().enumerate() {
let g = assignments[sym_tile[idx]] as usize;
emit_symbol(
bw,
sym,
&group_codes[g][0],
&group_lens[g][0],
&group_codes[g][1],
&group_lens[g][1],
&group_codes[g][2],
&group_lens[g][2],
&group_codes[g][3],
&group_lens[g][3],
&group_codes[g][4],
&group_lens[g][4],
);
}
Ok(true)
}
fn cluster_tiles_kmeans(tile_green: &[Vec<u32>], num_tiles: usize, k: usize) -> Vec<u32> {
let alpha = tile_green.first().map(|h| h.len()).unwrap_or(0);
if num_tiles < k || alpha == 0 || k < 2 {
return vec![0u32; num_tiles];
}
let mut seeds: Vec<usize> = Vec::with_capacity(k);
let mut max_total = 0u64;
let mut seed0 = 0usize;
for (i, h) in tile_green.iter().enumerate() {
let total: u64 = h.iter().map(|&v| v as u64).sum();
if total > max_total {
max_total = total;
seed0 = i;
}
}
seeds.push(seed0);
while seeds.len() < k {
let mut best_i = 0usize;
let mut best_d: u64 = 0;
for (i, h) in tile_green.iter().enumerate() {
if seeds.contains(&i) {
continue;
}
let min_d = seeds
.iter()
.map(|&s| l1_dist_u32(&tile_green[s], h))
.min()
.unwrap_or(0);
if min_d > best_d
|| (min_d == best_d && best_d == 0 && best_i == 0 && !seeds.contains(&best_i))
{
best_d = min_d;
best_i = i;
}
}
if seeds.contains(&best_i) {
for i in 0..num_tiles {
if !seeds.contains(&i) {
best_i = i;
break;
}
}
}
seeds.push(best_i);
}
let mut centroids: Vec<Vec<u32>> = seeds.iter().map(|&s| tile_green[s].clone()).collect();
let mut assignments = vec![0u32; num_tiles];
for _iter in 0..2 {
for (t, h) in tile_green.iter().enumerate() {
let mut best = 0u32;
let mut best_d = u64::MAX;
for (g, c) in centroids.iter().enumerate() {
let d = l1_dist_u32(c, h);
if d < best_d {
best_d = d;
best = g as u32;
}
}
assignments[t] = best;
}
let mut sums: Vec<Vec<u64>> = (0..k).map(|_| vec![0u64; alpha]).collect();
let mut counts = vec![0u64; k];
for (t, h) in tile_green.iter().enumerate() {
let g = assignments[t] as usize;
counts[g] += 1;
for (i, &v) in h.iter().enumerate() {
sums[g][i] += v as u64;
}
}
for g in 0..k {
if counts[g] == 0 {
continue;
}
for (i, s) in sums[g].iter().enumerate() {
centroids[g][i] = (s / counts[g]) as u32;
}
}
}
assignments
}
#[inline]
fn l1_dist_u32(a: &[u32], b: &[u32]) -> u64 {
debug_assert_eq!(a.len(), b.len());
let mut s = 0u64;
for (av, bv) in a.iter().zip(b.iter()) {
s += (*av).abs_diff(*bv) as u64;
}
s
}
#[derive(Clone, Copy)]
enum StreamSym {
Literal {
a: u8,
r: u8,
g: u8,
b: u8,
},
Backref {
len_sym: u32,
len_extra_bits: u32,
len_extra: u32,
dist_sym: u32,
dist_extra_bits: u32,
dist_extra: u32,
},
CacheRef {
index: u32,
},
}
fn encode_len_or_dist_value(value: u32) -> (u32, u32, u32) {
debug_assert!(value >= 1);
if value <= 4 {
return (value - 1, 0, 0);
}
let v = value - 1; let msb = 31 - v.leading_zeros(); let extra_bits = msb - 1;
let sym_sub = (v >> extra_bits) & 1; let symbol = 2 * extra_bits + 2 + sym_sub;
let offset = (2 + sym_sub) << extra_bits;
let extra = v - offset;
(symbol, extra_bits, extra)
}
struct CostModel {
green: Vec<u32>,
red: Vec<u32>,
blue: Vec<u32>,
alpha: Vec<u32>,
dist: Vec<u32>,
}
const COST_SHIFT: u32 = 4;
const COST_UNSEEN: u32 = 40 << COST_SHIFT;
impl CostModel {
fn from_freqs(green: &[u32], red: &[u32], blue: &[u32], alpha: &[u32], dist: &[u32]) -> Self {
Self {
green: per_alphabet_cost(green),
red: per_alphabet_cost(red),
blue: per_alphabet_cost(blue),
alpha: per_alphabet_cost(alpha),
dist: per_alphabet_cost(dist),
}
}
fn literal_cost(&self, a: u8, r: u8, g: u8, b: u8) -> u32 {
self.green
.get(g as usize)
.copied()
.unwrap_or(COST_UNSEEN)
.saturating_add(self.red.get(r as usize).copied().unwrap_or(COST_UNSEEN))
.saturating_add(self.blue.get(b as usize).copied().unwrap_or(COST_UNSEEN))
.saturating_add(self.alpha.get(a as usize).copied().unwrap_or(COST_UNSEEN))
}
fn cache_cost(&self, idx: u32) -> u32 {
let s = 256 + 24 + idx as usize;
self.green.get(s).copied().unwrap_or(COST_UNSEEN)
}
fn backref_cost(&self, length: u32, distance: u32) -> u32 {
let (len_sym, len_eb, _) = encode_len_or_dist_value(length);
let (dist_sym, dist_eb, _) = encode_len_or_dist_value(distance + 120);
let len_pos = 256 + len_sym as usize;
let len_cost = self.green.get(len_pos).copied().unwrap_or(COST_UNSEEN);
let dist_cost = self
.dist
.get(dist_sym as usize)
.copied()
.unwrap_or(COST_UNSEEN);
len_cost
.saturating_add(len_eb << COST_SHIFT)
.saturating_add(dist_cost)
.saturating_add(dist_eb << COST_SHIFT)
}
}
fn per_alphabet_cost(freq: &[u32]) -> Vec<u32> {
let total: u64 = freq.iter().map(|&v| v as u64).sum::<u64>().max(1);
freq.iter()
.map(|&f| {
if f == 0 {
COST_UNSEEN
} else {
let log_total = log2_fixed(total as u32);
let log_f = log2_fixed(f);
let raw = (log_total as i64 - log_f as i64).max(1 << COST_SHIFT) as u32;
raw.min(COST_UNSEEN)
}
})
.collect()
}
fn log2_fixed(x: u32) -> u32 {
if x <= 1 {
return 0;
}
let int_part = 31 - x.leading_zeros();
let base = 1u32 << int_part;
let frac = ((x - base) << COST_SHIFT) / base; (int_part << COST_SHIFT) + frac
}
fn build_symbol_stream(
pixels: &[u32],
_width: u32,
_height: u32,
cache_bits: u32,
cost_model: Option<&CostModel>,
) -> Vec<StreamSym> {
let mut out: Vec<StreamSym> = Vec::with_capacity(pixels.len());
let n = pixels.len();
const HASH_BITS: u32 = 12;
const HASH_SIZE: usize = 1 << HASH_BITS;
let mut head: Vec<usize> = vec![usize::MAX; HASH_SIZE];
let mut next: Vec<usize> = vec![usize::MAX; n];
let hash3 = |p0: u32, p1: u32, p2: u32| -> usize {
let k = p0
.wrapping_mul(0x9E3779B9)
.wrapping_add(p1.wrapping_mul(0x85EBCA77))
.wrapping_add(p2.wrapping_mul(0xC2B2AE3D));
(k >> (32 - HASH_BITS)) as usize
};
let cache_size = if cache_bits == 0 {
0usize
} else {
1usize << cache_bits
};
let mut cache: Vec<u32> = vec![0u32; cache_size];
let find_match =
|start: usize, head: &[usize], next: &[usize], cm: Option<&CostModel>| -> (usize, usize) {
if start + MIN_MATCH > n {
return (0, 0);
}
let h = hash3(pixels[start], pixels[start + 1], pixels[start + 2]);
let mut candidate = head[h];
let mut tries = LZ_MAX_TRIES;
let mut best_len = 0usize;
let mut best_dist = 0usize;
let mut best_ratio_len = 0usize;
let mut best_ratio_dist = 0usize;
let mut best_ratio: u64 = u64::MAX;
while candidate != usize::MAX && tries > 0 {
if candidate >= start {
candidate = next[candidate];
tries -= 1;
continue;
}
let dist = start - candidate;
if dist == 0 || dist > LZ_WINDOW {
break;
}
let max_len = (n - start).min(MAX_MATCH);
let mut l = 0usize;
while l < max_len && pixels[candidate + l] == pixels[start + l] {
l += 1;
}
if l >= MIN_MATCH {
if l > best_len {
best_len = l;
best_dist = dist;
}
if let Some(cm) = cm {
let bits = cm.backref_cost(l as u32, dist as u32);
let ratio = (bits as u64) * 16 / l as u64;
if ratio < best_ratio {
best_ratio = ratio;
best_ratio_len = l;
best_ratio_dist = dist;
}
}
if l >= 256 {
break;
}
}
candidate = next[candidate];
tries -= 1;
}
if cm.is_some() && best_ratio_len >= MIN_MATCH {
(best_ratio_len, best_ratio_dist)
} else {
(best_len, best_dist)
}
};
let mut i = 0usize;
while i < n {
let (best_len, best_dist) = find_match(i, &head, &next, cost_model);
let mut emit_match_len = best_len;
let mut emit_match_dist = best_dist;
let mut emit_literal = best_len < MIN_MATCH;
if let Some(cm) = cost_model {
if best_len >= MIN_MATCH && i + 1 < n {
let match_cost = cm.backref_cost(best_len as u32, best_dist as u32);
let p_i = pixels[i];
let lit_i_cost = if cache_size > 0 {
let idx = (0x1e35_a7bd_u32.wrapping_mul(p_i) >> (32 - cache_bits)) as usize;
if idx < cache.len() && cache[idx] == p_i {
cm.cache_cost(idx as u32)
} else {
cm.literal_cost(
((p_i >> 24) & 0xff) as u8,
((p_i >> 16) & 0xff) as u8,
((p_i >> 8) & 0xff) as u8,
(p_i & 0xff) as u8,
)
}
} else {
cm.literal_cost(
((p_i >> 24) & 0xff) as u8,
((p_i >> 16) & 0xff) as u8,
((p_i >> 8) & 0xff) as u8,
(p_i & 0xff) as u8,
)
};
let (look_len, look_dist) = find_match(i + 1, &head, &next, cost_model);
if look_len + 1 >= best_len && look_len >= MIN_MATCH {
let lookahead_cost = lit_i_cost
.saturating_add(cm.backref_cost(look_len as u32, look_dist as u32));
if lookahead_cost < match_cost {
emit_match_len = 0;
emit_match_dist = 0;
emit_literal = true;
}
}
}
}
if !emit_literal && emit_match_len >= MIN_MATCH {
let (len_sym, len_eb, len_ex) = encode_len_or_dist_value(emit_match_len as u32);
let (dist_sym, dist_eb, dist_ex) =
encode_len_or_dist_value((emit_match_dist as u32) + 120);
out.push(StreamSym::Backref {
len_sym,
len_extra_bits: len_eb,
len_extra: len_ex,
dist_sym,
dist_extra_bits: dist_eb,
dist_extra: dist_ex,
});
for k in 0..emit_match_len {
let pos = i + k;
if pos + 2 < n {
let h = hash3(pixels[pos], pixels[pos + 1], pixels[pos + 2]);
next[pos] = head[h];
head[h] = pos;
}
if cache_size > 0 {
cache_add(&mut cache, cache_bits, pixels[pos]);
}
}
i += emit_match_len;
} else {
let p = pixels[i];
let mut emitted_cache = false;
if cache_size > 0 {
let idx = (0x1e35_a7bd_u32.wrapping_mul(p) >> (32 - cache_bits)) as usize;
if idx < cache.len() && cache[idx] == p {
out.push(StreamSym::CacheRef { index: idx as u32 });
emitted_cache = true;
}
}
if !emitted_cache {
out.push(StreamSym::Literal {
a: ((p >> 24) & 0xff) as u8,
r: ((p >> 16) & 0xff) as u8,
g: ((p >> 8) & 0xff) as u8,
b: (p & 0xff) as u8,
});
}
if cache_size > 0 {
cache_add(&mut cache, cache_bits, p);
}
if i + 2 < n {
let h = hash3(pixels[i], pixels[i + 1], pixels[i + 2]);
next[i] = head[h];
head[h] = i;
}
i += 1;
}
}
out
}
fn estimate_stream_cost(stream: &[StreamSym], cm: &CostModel) -> u64 {
let mut total: u64 = 0;
for sym in stream {
match *sym {
StreamSym::Literal { a, r, g, b } => {
total += cm.literal_cost(a, r, g, b) as u64;
}
StreamSym::Backref {
len_sym,
len_extra_bits,
dist_sym,
dist_extra_bits,
..
} => {
let len_pos = 256 + len_sym as usize;
let len_cost = cm.green.get(len_pos).copied().unwrap_or(COST_UNSEEN);
let dist_cost = cm
.dist
.get(dist_sym as usize)
.copied()
.unwrap_or(COST_UNSEEN);
total += len_cost as u64
+ (len_extra_bits as u64) * (1u64 << COST_SHIFT)
+ dist_cost as u64
+ (dist_extra_bits as u64) * (1u64 << COST_SHIFT);
}
StreamSym::CacheRef { index } => {
total += cm.cache_cost(index) as u64;
}
}
}
total
}
fn build_cost_modelled_stream(
pixels: &[u32],
width: u32,
height: u32,
cache_bits: u32,
) -> Vec<StreamSym> {
let pass1 = build_symbol_stream(pixels, width, height, cache_bits, None);
if (pixels.len() as u64) < 4096 {
return pass1;
}
let cache_size = if cache_bits == 0 {
0u32
} else {
1u32 << cache_bits
};
let green_alpha = 256 + 24 + cache_size as usize;
let mut green_freq = vec![0u32; green_alpha];
let mut red_freq = vec![0u32; 256];
let mut blue_freq = vec![0u32; 256];
let mut alpha_freq = vec![0u32; 256];
let mut dist_freq = vec![0u32; 40];
for sym in &pass1 {
accumulate_symbol_freq(
sym,
&mut green_freq,
&mut red_freq,
&mut blue_freq,
&mut alpha_freq,
&mut dist_freq,
);
}
let cm = CostModel::from_freqs(&green_freq, &red_freq, &blue_freq, &alpha_freq, &dist_freq);
let pass2 = build_symbol_stream(pixels, width, height, cache_bits, Some(&cm));
let cost1 = estimate_stream_cost(&pass1, &cm);
let cost2 = estimate_stream_cost(&pass2, &cm);
if std::env::var_os("VP8L_TRACE").is_some() {
eprintln!(
"[vp8l] pass1={} sym, pass2={} sym, cost1={}, cost2={}, win={}",
pass1.len(),
pass2.len(),
cost1,
cost2,
if cost2 < cost1 { "pass2" } else { "pass1" }
);
}
if cost2 < cost1 {
pass2
} else {
pass1
}
}
fn cache_add(cache: &mut [u32], cache_bits: u32, argb: u32) {
if cache.is_empty() {
return;
}
let idx = (0x1e35_a7bd_u32.wrapping_mul(argb) >> (32 - cache_bits)) as usize;
if idx < cache.len() {
cache[idx] = argb;
}
}
const MAX_PALETTE_SIZE: usize = 256;
fn build_palette(pixels: &[u32]) -> Option<Vec<u32>> {
let mut palette: Vec<u32> = Vec::with_capacity(MAX_PALETTE_SIZE + 1);
for &p in pixels {
match palette.binary_search(&p) {
Ok(_) => {}
Err(pos) => {
if palette.len() >= MAX_PALETTE_SIZE {
return None;
}
palette.insert(pos, p);
}
}
}
if palette.len() < 2 {
return None;
}
Some(palette)
}
fn bits_per_pixel_for(num_colors: u32) -> u32 {
if num_colors <= 2 {
1
} else if num_colors <= 4 {
2
} else if num_colors <= 16 {
4
} else {
8
}
}
fn delta_encode_palette(palette: &[u32]) -> Vec<u32> {
let mut out: Vec<u32> = Vec::with_capacity(palette.len());
out.push(palette[0]);
for i in 1..palette.len() {
out.push(sub_argb(palette[i], palette[i - 1]));
}
out
}
fn pack_palette_indices(
pixels: &[u32],
width: u32,
height: u32,
palette: &[u32],
bits_per_pixel: u32,
packed_w: u32,
) -> Vec<u32> {
let w = width as usize;
let h = height as usize;
let pack = (8 / bits_per_pixel) as usize;
let pw = packed_w as usize;
let mut out: Vec<u32> = vec![0u32; pw * h];
for y in 0..h {
for xp in 0..pw {
let mut g_byte: u32 = 0;
for sub in 0..pack {
let x_orig = xp * pack + sub;
if x_orig >= w {
break;
}
let pixel = pixels[y * w + x_orig];
let idx = palette
.binary_search(&pixel)
.expect("palette must contain every input pixel by construction");
g_byte |= (idx as u32) << (sub as u32 * bits_per_pixel);
}
out[y * pw + xp] = 0xff00_0000 | (g_byte << 8);
}
}
out
}
fn apply_subtract_green_forward(pixels: &mut [u32]) {
for p in pixels.iter_mut() {
let a = (*p >> 24) & 0xff;
let r = (*p >> 16) & 0xff;
let g = (*p >> 8) & 0xff;
let b = *p & 0xff;
let nr = r.wrapping_sub(g) & 0xff;
let nb = b.wrapping_sub(g) & 0xff;
*p = (a << 24) | (nr << 16) | (g << 8) | nb;
}
}
fn predict_argb(out: &[u32], w: usize, x: usize, y: usize, mode: u32) -> u32 {
let l = out[y * w + x - 1];
let t = out[(y - 1) * w + x];
let tl = out[(y - 1) * w + x - 1];
let tr = if x + 1 < w {
out[(y - 1) * w + x + 1]
} else {
out[y * w]
};
match mode {
0 => 0xff00_0000,
1 => l,
2 => t,
3 => tr,
4 => tl,
5 => avg2(avg2(l, tr), t),
6 => avg2(l, tl),
7 => avg2(l, t),
8 => avg2(tl, t),
9 => avg2(t, tr),
10 => avg2(avg2(l, tl), avg2(t, tr)),
11 => select_argb(l, t, tl),
12 => clamp_add_sub_argb(l, t, tl),
13 => clamp_add_sub_half_argb(avg2(l, t), tl),
_ => 0xff00_0000,
}
}
fn avg2(a: u32, b: u32) -> u32 {
let mut out = 0u32;
for c in 0..4 {
let sh = c * 8;
let av = (a >> sh) & 0xff;
let bv = (b >> sh) & 0xff;
out |= ((av + bv) >> 1) << sh;
}
out
}
fn select_argb(l: u32, t: u32, tl: u32) -> u32 {
let mut out = 0u32;
let mut dl = 0i32;
let mut dt = 0i32;
for c in 0..4 {
let sh = c * 8;
let lv = ((l >> sh) & 0xff) as i32;
let tv = ((t >> sh) & 0xff) as i32;
let tlv = ((tl >> sh) & 0xff) as i32;
dl += (tv - tlv).abs();
dt += (lv - tlv).abs();
}
for c in 0..4 {
let sh = c * 8;
let lv = (l >> sh) & 0xff;
let tv = (t >> sh) & 0xff;
let v = if dl < dt { lv } else { tv };
out |= v << sh;
}
out
}
fn clamp_add_sub_argb(l: u32, t: u32, tl: u32) -> u32 {
let mut out = 0u32;
for c in 0..4 {
let sh = c * 8;
let lv = ((l >> sh) & 0xff) as i32;
let tv = ((t >> sh) & 0xff) as i32;
let tlv = ((tl >> sh) & 0xff) as i32;
let v = (lv + tv - tlv).clamp(0, 255) as u32;
out |= v << sh;
}
out
}
fn clamp_add_sub_half_argb(a: u32, b: u32) -> u32 {
let mut out = 0u32;
for c in 0..4 {
let sh = c * 8;
let av = ((a >> sh) & 0xff) as i32;
let bv = ((b >> sh) & 0xff) as i32;
let v = (av + (av - bv) / 2).clamp(0, 255) as u32;
out |= v << sh;
}
out
}
fn sub_argb(a: u32, b: u32) -> u32 {
let aa = (a >> 24) & 0xff;
let ar = (a >> 16) & 0xff;
let ag = (a >> 8) & 0xff;
let ab = a & 0xff;
let ba = (b >> 24) & 0xff;
let br = (b >> 16) & 0xff;
let bg = (b >> 8) & 0xff;
let bb = b & 0xff;
(((aa.wrapping_sub(ba)) & 0xff) << 24)
| (((ar.wrapping_sub(br)) & 0xff) << 16)
| (((ag.wrapping_sub(bg)) & 0xff) << 8)
| ((ab.wrapping_sub(bb)) & 0xff)
}
fn score_predictor_on_tile(
originals: &[u32],
decoded: &[u32],
width: usize,
tile_x0: usize,
tile_y0: usize,
tile_x1: usize,
tile_y1: usize,
mode: u32,
) -> u64 {
let mut cost = 0u64;
for y in tile_y0..tile_y1 {
for x in tile_x0..tile_x1 {
let idx = y * width + x;
let pred = if x == 0 && y == 0 {
0xff00_0000
} else if y == 0 {
decoded[idx - 1]
} else if x == 0 {
decoded[idx - width]
} else {
predict_argb(decoded, width, x, y, mode)
};
let p = originals[idx];
for c in 0..4 {
let sh = c * 8;
let pv = ((p >> sh) & 0xff) as i32;
let prv = ((pred >> sh) & 0xff) as i32;
let d = (pv - prv).unsigned_abs() as u64;
cost += d.min(256 - d);
}
}
}
cost
}
fn choose_predictor_modes(
pixels: &[u32],
width: u32,
height: u32,
tile_bits: u32,
sub_w: u32,
sub_h: u32,
) -> Vec<u32> {
let tile_side = 1usize << tile_bits;
let w = width as usize;
let h = height as usize;
let mut modes = Vec::with_capacity((sub_w * sub_h) as usize);
for ty in 0..sub_h as usize {
for tx in 0..sub_w as usize {
let x0 = tx * tile_side;
let y0 = ty * tile_side;
let x1 = (x0 + tile_side).min(w);
let y1 = (y0 + tile_side).min(h);
let mut best = PREDICTOR_MODES[0];
let mut best_cost = u64::MAX;
for &m in PREDICTOR_MODES {
let c = score_predictor_on_tile(pixels, pixels, w, x0, y0, x1, y1, m);
if c < best_cost {
best_cost = c;
best = m;
}
}
modes.push(best);
}
}
modes
}
#[derive(Clone, Copy, Default)]
struct ColorCoeffs {
g2r: i8,
g2b: i8,
r2b: i8,
}
fn residual_cost(r: i32, g: i32, b: i32, c: ColorCoeffs) -> u64 {
let nr = r - (((c.g2r as i32) * sign_extend_i8(g)) >> 5);
let nb1 = b - (((c.g2b as i32) * sign_extend_i8(g)) >> 5);
let nb2 = nb1 - (((c.r2b as i32) * sign_extend_i8(r)) >> 5);
let rr = nr.rem_euclid(256) as u64;
let bb = nb2.rem_euclid(256) as u64;
rr.min(256 - rr) + bb.min(256 - bb)
}
fn sign_extend_i8(v: i32) -> i32 {
((v & 0xff) as i8) as i32
}
fn choose_color_transform(
pixels: &[u32],
width: u32,
height: u32,
tile_bits: u32,
sub_w: u32,
sub_h: u32,
) -> Vec<ColorCoeffs> {
let tile_side = 1usize << tile_bits;
let w = width as usize;
let h = height as usize;
let mut out = Vec::with_capacity((sub_w * sub_h) as usize);
for ty in 0..sub_h as usize {
for tx in 0..sub_w as usize {
let x0 = tx * tile_side;
let y0 = ty * tile_side;
let x1 = (x0 + tile_side).min(w);
let y1 = (y0 + tile_side).min(h);
let mut tile: Vec<(i32, i32, i32)> = Vec::with_capacity((x1 - x0) * (y1 - y0));
for y in y0..y1 {
for x in x0..x1 {
let p = pixels[y * w + x];
let r = ((p >> 16) & 0xff) as i32;
let g = ((p >> 8) & 0xff) as i32;
let b = (p & 0xff) as i32;
tile.push((r, g, b));
}
}
let baseline_score = tile.iter().fold(0u64, |acc, &(r, _g, b)| {
let rr = r.rem_euclid(256) as u64;
let bb = b.rem_euclid(256) as u64;
acc + rr.min(256 - rr) + bb.min(256 - bb)
});
let mut best = ColorCoeffs::default();
let mut best_cost = baseline_score;
for &g2r in COLOR_COEFF_GRID.iter() {
for &g2b in COLOR_COEFF_GRID.iter() {
let c = ColorCoeffs { g2r, g2b, r2b: 0 };
let mut total = 0u64;
for &(r, g, b) in &tile {
total += residual_cost(r, g, b, c);
if total >= best_cost {
break;
}
}
if total < best_cost {
best_cost = total;
best = c;
}
}
}
for &r2b in COLOR_R2B_GRID.iter() {
let c = ColorCoeffs {
g2r: best.g2r,
g2b: best.g2b,
r2b,
};
let mut total = 0u64;
for &(r, g, b) in &tile {
total += residual_cost(r, g, b, c);
if total >= best_cost {
break;
}
}
if total < best_cost {
best_cost = total;
best = c;
}
}
out.push(best);
}
}
out
}
fn apply_color_transform_forward(
pixels: &[u32],
width: u32,
height: u32,
tile_bits: u32,
coeffs: &[ColorCoeffs],
sub_w: u32,
) -> Vec<u32> {
let w = width as usize;
let h = height as usize;
let mut out = vec![0u32; w * h];
for y in 0..h {
for x in 0..w {
let idx = y * w + x;
let p = pixels[idx];
let tx = x >> tile_bits;
let ty = y >> tile_bits;
let c = coeffs[ty * sub_w as usize + tx];
let a = (p >> 24) & 0xff;
let r_orig = ((p >> 16) & 0xff) as i32;
let g = ((p >> 8) & 0xff) as i32;
let b_orig = (p & 0xff) as i32;
let new_r = (r_orig - (((c.g2r as i32) * sign_extend_i8(g)) >> 5)).rem_euclid(256);
let b_after_g = (b_orig - (((c.g2b as i32) * sign_extend_i8(g)) >> 5)).rem_euclid(256);
let new_b =
(b_after_g - (((c.r2b as i32) * sign_extend_i8(r_orig)) >> 5)).rem_euclid(256);
out[idx] = (a << 24) | ((new_r as u32) << 16) | ((g as u32) << 8) | (new_b as u32);
}
}
out
}
fn apply_predictor_forward(
pixels: &[u32],
width: u32,
height: u32,
tile_bits: u32,
modes: &[u32],
sub_w: u32,
) -> Vec<u32> {
let w = width as usize;
let h = height as usize;
let mut out = vec![0u32; w * h];
for y in 0..h {
for x in 0..w {
let idx = y * w + x;
let pred = if x == 0 && y == 0 {
0xff00_0000
} else if y == 0 {
pixels[idx - 1]
} else if x == 0 {
pixels[idx - w]
} else {
let tx = x >> tile_bits;
let ty = y >> tile_bits;
let mode = modes[ty * sub_w as usize + tx];
predict_argb(pixels, w, x, y, mode)
};
out[idx] = sub_argb(pixels[idx], pred);
}
}
out
}
fn build_limited_lengths(freqs: &[u32], max_len: u8) -> Result<Vec<u8>> {
let n = freqs.len();
let mut lens = vec![0u8; n];
let nonzero: Vec<usize> = (0..n).filter(|&i| freqs[i] > 0).collect();
if nonzero.is_empty() {
if n >= 2 {
lens[0] = 1;
lens[1] = 1;
} else {
lens[0] = 1;
}
return Ok(lens);
}
if nonzero.len() == 1 {
let s = nonzero[0];
lens[s] = 1;
let d = if s == 0 { 1.min(n - 1) } else { 0 };
lens[d] = 1;
return Ok(lens);
}
#[derive(Clone)]
struct Node {
freq: u64,
left: i32,
right: i32,
symbol: i32,
}
let mut nodes: Vec<Node> = Vec::with_capacity(n * 2);
for (i, &f) in freqs.iter().enumerate() {
nodes.push(Node {
freq: f as u64,
left: -1,
right: -1,
symbol: i as i32,
});
}
let mut heap: std::collections::BinaryHeap<std::cmp::Reverse<(u64, usize)>> =
std::collections::BinaryHeap::new();
for &i in &nonzero {
heap.push(std::cmp::Reverse((nodes[i].freq, i)));
}
while heap.len() > 1 {
let std::cmp::Reverse((fa, a)) = heap.pop().unwrap();
let std::cmp::Reverse((fb, b)) = heap.pop().unwrap();
let idx = nodes.len();
nodes.push(Node {
freq: fa + fb,
left: a as i32,
right: b as i32,
symbol: -1,
});
heap.push(std::cmp::Reverse((fa + fb, idx)));
}
let root = heap.pop().unwrap().0 .1;
fn walk(nodes: &[Node], idx: usize, depth: u8, lens: &mut [u8]) {
let n = &nodes[idx];
if n.symbol >= 0 {
lens[n.symbol as usize] = depth.max(1);
} else {
walk(nodes, n.left as usize, depth + 1, lens);
walk(nodes, n.right as usize, depth + 1, lens);
}
}
walk(&nodes, root, 0, &mut lens);
limit_code_lengths(&mut lens, max_len);
Ok(lens)
}
fn limit_code_lengths(lens: &mut [u8], max_len: u8) {
let max_observed = *lens.iter().max().unwrap_or(&0);
if max_observed <= max_len {
return;
}
let mut bl_count: Vec<u32> = vec![0; (max_observed as usize + 1).max(1)];
for &l in lens.iter() {
if l > 0 {
bl_count[l as usize] += 1;
}
}
for l in (max_len as usize + 1)..bl_count.len() {
bl_count[max_len as usize] += bl_count[l];
bl_count[l] = 0;
}
let kraft = |bl: &[u32]| -> i64 {
let mut s: i64 = 0;
for (d, &c) in bl.iter().enumerate() {
if d == 0 || d > max_len as usize {
continue;
}
s += (c as i64) << (max_len as usize - d);
}
s
};
let target: i64 = 1i64 << (max_len as u32);
while kraft(&bl_count) > target {
let mut d = max_len as i32 - 1;
while d > 0 && bl_count[d as usize] == 0 {
d -= 1;
}
if d <= 0 {
break;
}
bl_count[d as usize] -= 1;
bl_count[(d + 1) as usize] += 1;
}
loop {
let k = kraft(&bl_count);
if k >= target {
break;
}
let deficit = target - k;
let mut chosen: Option<i32> = None;
let mut d = max_len as i32;
while d > 1 {
if bl_count[d as usize] > 0 {
let add = 1i64 << ((max_len as i32 - d).max(0) as u32);
if add <= deficit {
chosen = Some(d);
break;
}
}
d -= 1;
}
let Some(d) = chosen else {
break;
};
bl_count[d as usize] -= 1;
bl_count[(d - 1) as usize] += 1;
}
let mut by_depth: Vec<(u8, usize)> = lens
.iter()
.enumerate()
.filter(|(_, &l)| l > 0)
.map(|(i, &l)| (l, i))
.collect();
by_depth.sort_by(|a, b| b.0.cmp(&a.0).then(a.1.cmp(&b.1)));
for l in lens.iter_mut() {
*l = 0;
}
let mut idx = 0usize;
for l in (1..=max_len as usize).rev() {
let cnt = bl_count[l] as usize;
for _ in 0..cnt {
if idx >= by_depth.len() {
break;
}
let (_, sym) = by_depth[idx];
lens[sym] = l as u8;
idx += 1;
}
}
while idx < by_depth.len() {
let (_, sym) = by_depth[idx];
lens[sym] = max_len;
idx += 1;
}
}
fn canonical_codes(lens: &[u8]) -> Vec<u32> {
let max_len = *lens.iter().max().unwrap_or(&0);
let mut codes = vec![0u32; lens.len()];
if max_len == 0 {
return codes;
}
let mut bl_count = vec![0u32; max_len as usize + 1];
for &l in lens {
if l > 0 {
bl_count[l as usize] += 1;
}
}
let mut next_code = vec![0u32; max_len as usize + 1];
let mut code = 0u32;
for bits in 1..=max_len as usize {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
for (sym, &l) in lens.iter().enumerate() {
if l > 0 {
codes[sym] = next_code[l as usize];
next_code[l as usize] += 1;
}
}
codes
}
fn write_code(bw: &mut BitWriter, codes: &[u32], lens: &[u8], sym: usize) {
let l = lens[sym];
let code = codes[sym];
let mut rev = 0u32;
for i in 0..l {
if (code >> i) & 1 != 0 {
rev |= 1 << (l - 1 - i);
}
}
bw.write(rev, l as u32);
}
fn emit_huffman_tree(bw: &mut BitWriter, lens: &[u8]) -> Result<()> {
bw.write(0, 1);
const CODE_LENGTH_ORDER: [usize; 19] = [
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
];
let meta_stream = compress_lengths(lens);
let mut meta_freq = vec![0u32; 19];
for (code, _extra) in &meta_stream {
meta_freq[*code as usize] += 1;
}
let meta_lens = build_limited_lengths(&meta_freq, 7)?;
let meta_codes = canonical_codes(&meta_lens);
let mut last_used = 0usize;
for i in 0..19 {
let sym = CODE_LENGTH_ORDER[i];
if meta_lens[sym] != 0 {
last_used = i + 1;
}
}
let num_code_lengths = last_used.max(4);
bw.write((num_code_lengths - 4) as u32, 4);
for i in 0..num_code_lengths {
let sym = CODE_LENGTH_ORDER[i];
bw.write(meta_lens[sym] as u32, 3);
}
bw.write(0, 1);
for (code, extra) in &meta_stream {
write_code(bw, &meta_codes, &meta_lens, *code as usize);
match *code {
16 => bw.write(*extra, 2),
17 => bw.write(*extra, 3),
18 => bw.write(*extra, 7),
_ => {}
}
}
Ok(())
}
fn try_emit_simple_huffman(bw: &mut BitWriter, freqs: &[u32]) -> Option<(Vec<u8>, Vec<u32>)> {
let alphabet = freqs.len();
let nonzero: Vec<usize> = (0..alphabet).filter(|&i| freqs[i] > 0).collect();
let lens = vec![0u8; alphabet];
let codes = vec![0u32; alphabet];
match nonzero.len() {
0 => {
bw.write(1, 1); bw.write(0, 1); bw.write(0, 1); bw.write(0, 1); Some((lens, codes))
}
1 => {
let s = nonzero[0];
if s >= 256 {
return None;
}
bw.write(1, 1); bw.write(0, 1); if s <= 1 {
bw.write(0, 1); bw.write(s as u32, 1);
} else {
bw.write(1, 1); bw.write(s as u32, 8);
}
Some((lens, codes))
}
2 => {
let mut a = nonzero[0];
let mut b = nonzero[1];
if a >= 256 || b >= 256 {
return None;
}
if a > b {
std::mem::swap(&mut a, &mut b);
}
bw.write(1, 1); bw.write(1, 1); if a <= 1 {
bw.write(0, 1); bw.write(a as u32, 1);
} else {
bw.write(1, 1); bw.write(a as u32, 8);
}
bw.write(b as u32, 8); let mut lens = lens;
let mut codes = codes;
lens[a] = 1;
lens[b] = 1;
codes[a] = 0;
codes[b] = 1;
Some((lens, codes))
}
_ => None,
}
}
fn build_and_emit_huffman_tree(
bw: &mut BitWriter,
freqs: &[u32],
max_len: u8,
) -> Result<(Vec<u8>, Vec<u32>)> {
if let Some((lens, codes)) = try_emit_simple_huffman(bw, freqs) {
return Ok((lens, codes));
}
let lens = build_limited_lengths(freqs, max_len)?;
let codes = canonical_codes(&lens);
emit_huffman_tree(bw, &lens)?;
Ok((lens, codes))
}
fn compress_lengths(lens: &[u8]) -> Vec<(u8, u32)> {
let mut out: Vec<(u8, u32)> = Vec::new();
let mut i = 0usize;
while i < lens.len() {
let v = lens[i];
if v == 0 {
let mut j = i;
while j < lens.len() && lens[j] == 0 {
j += 1;
}
let mut run = j - i;
while run >= 11 {
let take = run.min(138);
out.push((18, (take - 11) as u32));
run -= take;
}
while run >= 3 {
let take = run.min(10);
out.push((17, (take - 3) as u32));
run -= take;
}
for _ in 0..run {
out.push((0, 0));
}
i = j;
} else {
out.push((v, 0));
i += 1;
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_len_dist_roundtrip_small() {
for value in 1u32..=200 {
let (sym, eb, extra) = encode_len_or_dist_value(value);
let decoded = if sym < 4 {
sym + 1
} else {
let eb_d = (sym - 2) >> 1;
let off = (2 + (sym & 1)) << eb_d;
off + extra + 1
};
assert_eq!(decoded, value, "round-trip failed for value {value}");
assert!(eb <= 14, "extra_bits too big for value {value}");
let _ = extra;
let _ = eb;
}
}
#[test]
fn canonical_codes_match_decoder_shape() {
let lens = [2u8, 1u8, 3u8, 3u8];
let codes = canonical_codes(&lens);
assert_eq!(codes[1], 0b0);
assert_eq!(codes[0], 0b10);
assert_eq!(codes[2], 0b110);
assert_eq!(codes[3], 0b111);
}
#[test]
fn predictor_mode5_matches_spec_nesting() {
let mut buf = vec![0u32; 6];
buf[2] = 0xff00_0800; buf[3] = 0xff00_0400; buf[4] = 0xff00_0200; let pred = predict_argb(&buf, 3, 2, 1, 5);
let pred_g = (pred >> 8) & 0xff;
assert_eq!(
pred_g, 5,
"mode 5 must compute Average2(Average2(L, TR), T) per spec"
);
}
}