#![allow(clippy::too_many_arguments)]
use alloc::vec;
use alloc::vec::Vec;
use archmage::prelude::*;
#[cfg(target_arch = "x86")]
use archmage::intrinsics::x86 as simd_mem;
#[cfg(target_arch = "x86_64")]
use archmage::intrinsics::x86_64 as simd_mem;
use super::types::{argb_alpha, argb_blue, argb_green, argb_red, make_argb, subsample_size};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum TransformType {
Predictor = 0,
CrossColor = 1,
SubtractGreen = 2,
ColorIndexing = 3,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum PredictorMode {
Black = 0, Left = 1, Top = 2, TopRight = 3, TopLeft = 4, AvgAvgLtrT = 5, AvgLTl = 6, AvgLT = 7, AvgTlT = 8, AvgTTr = 9, AvgAvgLTlAvgTTr = 10, Select = 11, ClampAddSubtractFull = 12, ClampAddSubtractHalf = 13, }
impl PredictorMode {
pub const fn all() -> [PredictorMode; 14] {
[
PredictorMode::Black,
PredictorMode::Left,
PredictorMode::Top,
PredictorMode::TopRight,
PredictorMode::TopLeft,
PredictorMode::AvgAvgLtrT,
PredictorMode::AvgLTl,
PredictorMode::AvgLT,
PredictorMode::AvgTlT,
PredictorMode::AvgTTr,
PredictorMode::AvgAvgLTlAvgTTr,
PredictorMode::Select,
PredictorMode::ClampAddSubtractFull,
PredictorMode::ClampAddSubtractHalf,
]
}
pub const fn from_u8(val: u8) -> Self {
match val {
0 => PredictorMode::Black,
1 => PredictorMode::Left,
2 => PredictorMode::Top,
3 => PredictorMode::TopRight,
4 => PredictorMode::TopLeft,
5 => PredictorMode::AvgAvgLtrT,
6 => PredictorMode::AvgLTl,
7 => PredictorMode::AvgLT,
8 => PredictorMode::AvgTlT,
9 => PredictorMode::AvgTTr,
10 => PredictorMode::AvgAvgLTlAvgTTr,
11 => PredictorMode::Select,
12 => PredictorMode::ClampAddSubtractFull,
_ => PredictorMode::ClampAddSubtractHalf,
}
}
}
pub fn apply_subtract_green(pixels: &mut [u32]) {
incant!(
apply_subtract_green_impl(pixels),
[v1, neon, wasm128, scalar]
);
}
fn apply_subtract_green_impl_scalar(_token: ScalarToken, pixels: &mut [u32]) {
for pixel in pixels.iter_mut() {
let a = argb_alpha(*pixel);
let r = argb_red(*pixel);
let g = argb_green(*pixel);
let b = argb_blue(*pixel);
let new_r = r.wrapping_sub(g);
let new_b = b.wrapping_sub(g);
*pixel = make_argb(a, new_r, g, new_b);
}
}
#[cfg(target_arch = "x86_64")]
#[arcane]
fn apply_subtract_green_impl_v1(_token: X64V1Token, pixels: &mut [u32]) {
apply_subtract_green_sse2(_token, pixels);
}
#[cfg(target_arch = "aarch64")]
fn apply_subtract_green_impl_neon(_token: NeonToken, pixels: &mut [u32]) {
apply_subtract_green_impl_scalar(ScalarToken, pixels);
}
#[cfg(target_arch = "wasm32")]
fn apply_subtract_green_impl_wasm128(_token: Wasm128Token, pixels: &mut [u32]) {
apply_subtract_green_impl_scalar(ScalarToken, pixels);
}
#[cfg(target_arch = "x86_64")]
#[rite]
fn apply_subtract_green_sse2(_token: X64V1Token, pixels: &mut [u32]) {
let len = pixels.len();
let simd_len = len & !3;
let mut i = 0;
while i < simd_len {
let chunk = pixels[i..].first_chunk_mut::<4>().unwrap();
let inp = simd_mem::_mm_loadu_si128(&*chunk);
let ag = _mm_srli_epi16(inp, 8);
let b = _mm_shufflelo_epi16(ag, 0xA0); let c = _mm_shufflehi_epi16(b, 0xA0);
let out = _mm_sub_epi8(inp, c);
simd_mem::_mm_storeu_si128(chunk, out);
i += 4;
}
while i < len {
let pixel = pixels[i];
let a = argb_alpha(pixel);
let r = argb_red(pixel);
let g = argb_green(pixel);
let b = argb_blue(pixel);
let new_r = r.wrapping_sub(g);
let new_b = b.wrapping_sub(g);
pixels[i] = make_argb(a, new_r, g, new_b);
i += 1;
}
}
#[inline(always)]
fn predict(mode: PredictorMode, left: u32, top: u32, top_left: u32, top_right: u32) -> u32 {
match mode {
PredictorMode::Black => 0xff000000,
PredictorMode::Left => left,
PredictorMode::Top => top,
PredictorMode::TopRight => top_right,
PredictorMode::TopLeft => top_left,
PredictorMode::AvgAvgLtrT => average2(average2(left, top_right), top),
PredictorMode::AvgLTl => average2(left, top_left),
PredictorMode::AvgLT => average2(left, top),
PredictorMode::AvgTlT => average2(top_left, top),
PredictorMode::AvgTTr => average2(top, top_right),
PredictorMode::AvgAvgLTlAvgTTr => {
average2(average2(left, top_left), average2(top, top_right))
}
PredictorMode::Select => select(left, top, top_left),
PredictorMode::ClampAddSubtractFull => clamp_add_subtract_full(left, top, top_left),
PredictorMode::ClampAddSubtractHalf => clamp_add_subtract_half(left, top, top_left),
}
}
#[inline]
fn average2(a: u32, b: u32) -> u32 {
let aa = argb_alpha(a) as u16 + argb_alpha(b) as u16;
let ar = argb_red(a) as u16 + argb_red(b) as u16;
let ag = argb_green(a) as u16 + argb_green(b) as u16;
let ab = argb_blue(a) as u16 + argb_blue(b) as u16;
make_argb(
(aa / 2) as u8,
(ar / 2) as u8,
(ag / 2) as u8,
(ab / 2) as u8,
)
}
#[inline]
fn select(left: u32, top: u32, top_left: u32) -> u32 {
let pa = (argb_alpha(top) as i16 - argb_alpha(top_left) as i16).unsigned_abs();
let pr = (argb_red(top) as i16 - argb_red(top_left) as i16).unsigned_abs();
let pg = (argb_green(top) as i16 - argb_green(top_left) as i16).unsigned_abs();
let pb = (argb_blue(top) as i16 - argb_blue(top_left) as i16).unsigned_abs();
let predict_left = pa + pr + pg + pb;
let pa = (argb_alpha(left) as i16 - argb_alpha(top_left) as i16).unsigned_abs();
let pr = (argb_red(left) as i16 - argb_red(top_left) as i16).unsigned_abs();
let pg = (argb_green(left) as i16 - argb_green(top_left) as i16).unsigned_abs();
let pb = (argb_blue(left) as i16 - argb_blue(top_left) as i16).unsigned_abs();
let predict_top = pa + pr + pg + pb;
if predict_left < predict_top {
left
} else {
top
}
}
#[inline]
fn clamp(val: i16) -> u8 {
val.clamp(0, 255) as u8
}
#[inline]
fn clamp_add_subtract_full(left: u32, top: u32, top_left: u32) -> u32 {
let a = clamp(argb_alpha(left) as i16 + argb_alpha(top) as i16 - argb_alpha(top_left) as i16);
let r = clamp(argb_red(left) as i16 + argb_red(top) as i16 - argb_red(top_left) as i16);
let g = clamp(argb_green(left) as i16 + argb_green(top) as i16 - argb_green(top_left) as i16);
let b = clamp(argb_blue(left) as i16 + argb_blue(top) as i16 - argb_blue(top_left) as i16);
make_argb(a, r, g, b)
}
#[inline]
fn clamp_add_subtract_half(left: u32, top: u32, top_left: u32) -> u32 {
let avg_a = (argb_alpha(left) as i16 + argb_alpha(top) as i16) / 2;
let avg_r = (argb_red(left) as i16 + argb_red(top) as i16) / 2;
let avg_g = (argb_green(left) as i16 + argb_green(top) as i16) / 2;
let avg_b = (argb_blue(left) as i16 + argb_blue(top) as i16) / 2;
let a = (avg_a + (avg_a - argb_alpha(top_left) as i16) / 2).clamp(0, 255) as u8;
let r = (avg_r + (avg_r - argb_red(top_left) as i16) / 2).clamp(0, 255) as u8;
let g = (avg_g + (avg_g - argb_green(top_left) as i16) / 2).clamp(0, 255) as u8;
let b = (avg_b + (avg_b - argb_blue(top_left) as i16) / 2).clamp(0, 255) as u8;
make_argb(a, r, g, b)
}
#[inline(always)]
fn residual(pixel: u32, pred: u32) -> u32 {
let a = argb_alpha(pixel).wrapping_sub(argb_alpha(pred));
let r = argb_red(pixel).wrapping_sub(argb_red(pred));
let g = argb_green(pixel).wrapping_sub(argb_green(pred));
let b = argb_blue(pixel).wrapping_sub(argb_blue(pred));
make_argb(a, r, g, b)
}
pub fn apply_predictor_transform(
pixels: &mut [u32],
width: usize,
height: usize,
size_bits: u8,
max_quantization: u32,
used_subtract_green: bool,
) -> Vec<u32> {
let block_size = 1usize << size_bits;
let blocks_x = subsample_size(width as u32, size_bits) as usize;
let blocks_y = subsample_size(height as u32, size_bits) as usize;
let mut predictor_data = vec![0u32; blocks_x * blocks_y];
let mut accumulated = [[0u32; 256]; 4];
for by in 0..blocks_y {
for bx in 0..blocks_x {
let left_mode = if bx > 0 {
argb_green(predictor_data[by * blocks_x + bx - 1])
} else {
0xff
};
let above_mode = if by > 0 {
argb_green(predictor_data[(by - 1) * blocks_x + bx])
} else {
0xff
};
let best_mode = choose_best_predictor(
pixels,
width,
height,
bx,
by,
block_size,
&mut accumulated,
left_mode,
above_mode,
);
predictor_data[by * blocks_x + bx] = 0xff000000 | ((best_mode as u32) << 8);
}
}
if max_quantization > 1 {
let mut all_max_diffs = vec![0u8; width * height];
for y in 1..height.saturating_sub(1) {
super::near_lossless::max_diffs_for_row(
pixels,
width,
y,
&mut all_max_diffs[y * width..],
used_subtract_green,
);
}
let mut residuals = vec![0u32; width * height];
for y in 0..height {
for x in 0..width {
let idx = y * width + x;
let pred = if y == 0 && x == 0 {
0xff000000
} else if y == 0 {
pixels[x - 1]
} else if x == 0 {
pixels[(y - 1) * width]
} else {
let bx = x >> size_bits;
let by = y >> size_bits;
let mode =
PredictorMode::from_u8(argb_green(predictor_data[by * blocks_x + bx]));
let left = pixels[y * width + x - 1];
let top = pixels[(y - 1) * width + x];
let top_left = pixels[(y - 1) * width + x - 1];
let top_right = if x + 1 < width {
pixels[(y - 1) * width + x + 1]
} else {
pixels[y * width]
};
predict(mode, left, top, top_left, top_right)
};
let mode_val = if y > 0 && x > 0 {
argb_green(predictor_data[(y >> size_bits) * blocks_x + (x >> size_bits)])
} else {
0 };
if mode_val == 0 || y == 0 || y == height - 1 || x == 0 || x == width - 1 {
residuals[idx] = residual(pixels[idx], pred);
} else {
let max_diff = all_max_diffs[idx];
let (res, recon) = super::near_lossless::near_lossless_residual(
pixels[idx],
pred,
max_quantization,
max_diff,
used_subtract_green,
);
residuals[idx] = res;
pixels[idx] = recon;
}
}
}
pixels.copy_from_slice(&residuals);
} else {
for y in (0..height).rev() {
for x in (0..width).rev() {
let pred = if y == 0 && x == 0 {
0xff000000
} else if y == 0 {
pixels[x - 1]
} else if x == 0 {
pixels[(y - 1) * width]
} else {
let bx = x >> size_bits;
let by = y >> size_bits;
let mode =
PredictorMode::from_u8(argb_green(predictor_data[by * blocks_x + bx]));
let left = pixels[y * width + x - 1];
let top = pixels[(y - 1) * width + x];
let top_left = pixels[(y - 1) * width + x - 1];
let top_right = if x + 1 < width {
pixels[(y - 1) * width + x + 1]
} else {
pixels[y * width]
};
predict(mode, left, top, top_left, top_right)
};
pixels[y * width + x] = residual(pixels[y * width + x], pred);
}
}
}
predictor_data
}
fn choose_best_predictor(
pixels: &[u32],
width: usize,
height: usize,
bx: usize,
by: usize,
block_size: usize,
accumulated: &mut [[u32; 256]; 4],
left_mode: u8,
above_mode: u8,
) -> PredictorMode {
let x_start = bx * block_size;
let y_start = by * block_size;
let x_end = (x_start + block_size).min(width);
let y_end = (y_start + block_size).min(height);
let x_eff = x_start.max(1);
let y_eff = y_start.max(1);
if x_eff >= x_end || y_eff >= y_end {
return PredictorMode::Black;
}
let mut best_mode = PredictorMode::Black;
let mut best_cost = i64::MAX;
let mut best_histo = [[0u32; 256]; 4];
for mode in PredictorMode::all() {
let mut tile_histo = [[0u32; 256]; 4];
for y in y_eff..y_end {
for x in x_eff..x_end {
let pixel = pixels[y * width + x];
let left = pixels[y * width + x - 1];
let top = pixels[(y - 1) * width + x];
let top_left = pixels[(y - 1) * width + x - 1];
let top_right = if x + 1 < width {
pixels[(y - 1) * width + x + 1]
} else {
pixels[y * width]
};
let pred = predict(mode, left, top, top_left, top_right);
let res = residual(pixel, pred);
update_histo(&mut tile_histo, res);
}
}
let cost = prediction_cost_spatial_histogram(
accumulated,
&tile_histo,
mode as u8,
left_mode,
above_mode,
);
if cost < best_cost {
best_cost = cost;
best_mode = mode;
best_histo = tile_histo;
}
}
for c in 0..4 {
for i in 0..256 {
accumulated[c][i] += best_histo[c][i];
}
}
best_mode
}
#[derive(Debug, Clone, Copy, Default)]
pub struct CrossColorMultipliers {
pub green_to_red: u8,
pub green_to_blue: u8,
pub red_to_blue: u8,
}
const LOG_2_PRECISION_BITS: u32 = 23;
const SPATIAL_PREDICTOR_BIAS: i64 = 15i64 << LOG_2_PRECISION_BITS;
#[inline]
fn color_transform_delta(color_pred: i8, color: i8) -> i32 {
(color_pred as i32 * color as i32) >> 5
}
#[inline]
fn fast_slog2(v: u32) -> u64 {
super::entropy::fast_slog2_public(v)
}
fn combined_shannon_entropy(x: &[u32; 256], y: &[u32; 256]) -> u64 {
incant!(
combined_shannon_entropy_impl(x, y),
[v1, neon, wasm128, scalar]
)
}
fn combined_shannon_entropy_impl_scalar(
_token: ScalarToken,
x: &[u32; 256],
y: &[u32; 256],
) -> u64 {
let mut retval: u64 = 0;
let mut sum_x: u32 = 0;
let mut sum_xy: u32 = 0;
for i in 0..256 {
let xi = x[i];
if xi != 0 {
let xy = xi + y[i];
sum_x += xi;
retval += fast_slog2(xi);
sum_xy += xy;
retval += fast_slog2(xy);
} else if y[i] != 0 {
sum_xy += y[i];
retval += fast_slog2(y[i]);
}
}
fast_slog2(sum_x) + fast_slog2(sum_xy) - retval
}
#[cfg(target_arch = "x86_64")]
#[arcane]
fn combined_shannon_entropy_impl_v1(_token: X64V1Token, x: &[u32; 256], y: &[u32; 256]) -> u64 {
combined_shannon_entropy_sse2(_token, x, y)
}
#[cfg(target_arch = "aarch64")]
fn combined_shannon_entropy_impl_neon(_token: NeonToken, x: &[u32; 256], y: &[u32; 256]) -> u64 {
combined_shannon_entropy_impl_scalar(ScalarToken, x, y)
}
#[cfg(target_arch = "wasm32")]
fn combined_shannon_entropy_impl_wasm128(
_token: Wasm128Token,
x: &[u32; 256],
y: &[u32; 256],
) -> u64 {
combined_shannon_entropy_impl_scalar(ScalarToken, x, y)
}
#[cfg(target_arch = "x86_64")]
#[rite]
fn combined_shannon_entropy_sse2(_token: X64V1Token, x: &[u32; 256], y: &[u32; 256]) -> u64 {
let mut retval: u64 = 0;
let mut sum_x: u32 = 0;
let mut sum_xy: u32 = 0;
let zero = _mm_setzero_si128();
let mut i = 0usize;
while i < 256 {
let x0 = simd_mem::_mm_loadu_si128(x[i..].first_chunk::<4>().unwrap());
let x1 = simd_mem::_mm_loadu_si128(x[i + 4..].first_chunk::<4>().unwrap());
let x2 = simd_mem::_mm_loadu_si128(x[i + 8..].first_chunk::<4>().unwrap());
let x3 = simd_mem::_mm_loadu_si128(x[i + 12..].first_chunk::<4>().unwrap());
let y0 = simd_mem::_mm_loadu_si128(y[i..].first_chunk::<4>().unwrap());
let y1 = simd_mem::_mm_loadu_si128(y[i + 4..].first_chunk::<4>().unwrap());
let y2 = simd_mem::_mm_loadu_si128(y[i + 8..].first_chunk::<4>().unwrap());
let y3 = simd_mem::_mm_loadu_si128(y[i + 12..].first_chunk::<4>().unwrap());
let x4 = _mm_packs_epi16(_mm_packs_epi32(x0, x1), _mm_packs_epi32(x2, x3));
let y4 = _mm_packs_epi16(_mm_packs_epi32(y0, y1), _mm_packs_epi32(y2, y3));
let mx = _mm_movemask_epi8(_mm_cmpgt_epi8(x4, zero)) as u32;
let mut my = (_mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) as u32) | mx;
while my != 0 {
let j = my.trailing_zeros() as usize;
if (mx >> j) & 1 != 0 {
let xv = x[i + j];
sum_x += xv;
retval += fast_slog2(xv);
}
let xy = x[i + j] + y[i + j];
sum_xy += xy;
retval += fast_slog2(xy);
my &= my - 1; }
i += 16;
}
fast_slog2(sum_x) + fast_slog2(sum_xy) - retval
}
#[inline]
fn div_round(a: i64, b: i64) -> i64 {
if (a < 0) == (b < 0) {
(a + b / 2) / b
} else {
(a - b / 2) / b
}
}
fn prediction_cost_bias(counts: &[u32; 256], weight_0: u64, mut exp_val: u64) -> i64 {
let significant_symbols = 256 >> 4; let exp_decay_factor: u64 = 6; let mut bits = (weight_0 * counts[0] as u64) << LOG_2_PRECISION_BITS;
exp_val <<= LOG_2_PRECISION_BITS;
for i in 1..significant_symbols {
bits += div_round(
(exp_val * (counts[i] as u64 + counts[256 - i] as u64)) as i64,
100,
) as u64;
exp_val = div_round((exp_decay_factor * exp_val) as i64, 10) as u64;
}
-div_round(bits as i64, 10)
}
#[inline(always)]
fn update_histo(histo: &mut [[u32; 256]; 4], argb: u32) {
histo[0][(argb >> 24) as usize] += 1;
histo[1][((argb >> 16) & 0xff) as usize] += 1;
histo[2][((argb >> 8) & 0xff) as usize] += 1;
histo[3][(argb & 0xff) as usize] += 1;
}
fn prediction_cost_spatial_histogram(
accumulated: &[[u32; 256]; 4],
tile: &[[u32; 256]; 4],
mode: u8,
left_mode: u8,
above_mode: u8,
) -> i64 {
let mut retval: i64 = 0;
for i in 0..4 {
const K_EXP_VALUE: u64 = 94;
retval += prediction_cost_bias(&tile[i], 1, K_EXP_VALUE);
retval += combined_shannon_entropy(&tile[i], &accumulated[i]) as i64;
}
if mode == left_mode {
retval -= SPATIAL_PREDICTOR_BIAS;
}
if mode == above_mode {
retval -= SPATIAL_PREDICTOR_BIAS;
}
retval
}
fn prediction_cost_cross_color(accumulated: &[u32; 256], counts: &[u32; 256]) -> i64 {
const K_EXP_VALUE: u64 = 240;
combined_shannon_entropy(counts, accumulated) as i64
+ prediction_cost_bias(counts, 3, K_EXP_VALUE)
}
fn collect_color_red_transforms(
argb: &[u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
green_to_red: u8,
histo: &mut [u32; 256],
) {
for y in start_y..end_y {
for x in start_x..end_x {
let pixel = argb[y * width + x];
let green = (pixel >> 8) as u8 as i8;
let mut new_red = (pixel >> 16) as i32;
new_red -= color_transform_delta(green_to_red as i8, green);
histo[(new_red & 0xff) as usize] += 1;
}
}
}
fn collect_color_blue_transforms(
argb: &[u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
green_to_blue: u8,
red_to_blue: u8,
histo: &mut [u32; 256],
) {
for y in start_y..end_y {
for x in start_x..end_x {
let pixel = argb[y * width + x];
let green = (pixel >> 8) as u8 as i8;
let red = (pixel >> 16) as u8 as i8;
let mut new_blue = pixel as i32 & 0xff;
new_blue -= color_transform_delta(green_to_blue as i8, green);
new_blue -= color_transform_delta(red_to_blue as i8, red);
histo[(new_blue & 0xff) as usize] += 1;
}
}
}
fn get_prediction_cost_red(
argb: &[u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
prev_x: &CrossColorMultipliers,
prev_y: &CrossColorMultipliers,
green_to_red: u8,
accumulated_red_histo: &[u32; 256],
) -> i64 {
let mut histo = [0u32; 256];
collect_color_red_transforms(
argb,
width,
start_x,
start_y,
end_x,
end_y,
green_to_red,
&mut histo,
);
let mut cur_diff = prediction_cost_cross_color(accumulated_red_histo, &histo);
if green_to_red == prev_x.green_to_red {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if green_to_red == prev_y.green_to_red {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if green_to_red == 0 {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
cur_diff
}
fn get_prediction_cost_blue(
argb: &[u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
prev_x: &CrossColorMultipliers,
prev_y: &CrossColorMultipliers,
green_to_blue: u8,
red_to_blue: u8,
accumulated_blue_histo: &[u32; 256],
) -> i64 {
let mut histo = [0u32; 256];
collect_color_blue_transforms(
argb,
width,
start_x,
start_y,
end_x,
end_y,
green_to_blue,
red_to_blue,
&mut histo,
);
let mut cur_diff = prediction_cost_cross_color(accumulated_blue_histo, &histo);
if green_to_blue == prev_x.green_to_blue {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if green_to_blue == prev_y.green_to_blue {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if red_to_blue == prev_x.red_to_blue {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if red_to_blue == prev_y.red_to_blue {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if green_to_blue == 0 {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
if red_to_blue == 0 {
cur_diff -= 3i64 << LOG_2_PRECISION_BITS;
}
cur_diff
}
fn get_best_green_to_red(
argb: &[u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
prev_x: &CrossColorMultipliers,
prev_y: &CrossColorMultipliers,
quality: u8,
accumulated_red_histo: &[u32; 256],
) -> u8 {
let max_iters = 4 + ((7 * quality as i32) >> 8); let mut green_to_red_best: i32 = 0;
let mut best_diff = get_prediction_cost_red(
argb,
width,
start_x,
start_y,
end_x,
end_y,
prev_x,
prev_y,
0,
accumulated_red_histo,
);
for iter in 0..max_iters {
let delta: i32 = 32 >> iter;
for &offset in &[-delta, delta] {
let green_to_red_cur = offset + green_to_red_best;
let cur_diff = get_prediction_cost_red(
argb,
width,
start_x,
start_y,
end_x,
end_y,
prev_x,
prev_y,
green_to_red_cur as u8,
accumulated_red_histo,
);
if cur_diff < best_diff {
best_diff = cur_diff;
green_to_red_best = green_to_red_cur;
}
}
}
(green_to_red_best & 0xff) as u8
}
fn get_best_green_red_to_blue(
argb: &[u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
prev_x: &CrossColorMultipliers,
prev_y: &CrossColorMultipliers,
quality: u8,
accumulated_blue_histo: &[u32; 256],
) -> (u8, u8) {
const OFFSETS: [[i8; 2]; 8] = [
[0, -1],
[0, 1],
[-1, 0],
[1, 0],
[-1, -1],
[-1, 1],
[1, -1],
[1, 1],
];
const DELTA_LUT: [i32; 7] = [16, 16, 8, 4, 2, 2, 2];
let iters = if quality < 25 {
1
} else if quality > 50 {
7
} else {
4
};
let mut green_to_blue_best: i32 = 0;
let mut red_to_blue_best: i32 = 0;
let mut best_diff = get_prediction_cost_blue(
argb,
width,
start_x,
start_y,
end_x,
end_y,
prev_x,
prev_y,
0,
0,
accumulated_blue_histo,
);
for iter in 0..iters {
let delta = DELTA_LUT[iter as usize];
for (axis, offset) in OFFSETS.iter().enumerate() {
let green_to_blue_cur = offset[0] as i32 * delta + green_to_blue_best;
let red_to_blue_cur = offset[1] as i32 * delta + red_to_blue_best;
let cur_diff = get_prediction_cost_blue(
argb,
width,
start_x,
start_y,
end_x,
end_y,
prev_x,
prev_y,
green_to_blue_cur as u8,
red_to_blue_cur as u8,
accumulated_blue_histo,
);
if cur_diff < best_diff {
best_diff = cur_diff;
green_to_blue_best = green_to_blue_cur;
red_to_blue_best = red_to_blue_cur;
}
if quality < 25 && axis == 3 {
break;
}
}
if delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0 {
break;
}
}
(
(green_to_blue_best & 0xff) as u8,
(red_to_blue_best & 0xff) as u8,
)
}
pub fn apply_cross_color_transform(
pixels: &mut [u32],
width: usize,
height: usize,
transform_bits: u8,
quality: u8,
) -> Vec<u32> {
let block_size = 1usize << transform_bits;
let tiles_x = subsample_size(width as u32, transform_bits) as usize;
let tiles_y = subsample_size(height as u32, transform_bits) as usize;
let mut transform_data = vec![0u32; tiles_x * tiles_y];
let mut accumulated_red_histo = [0u32; 256];
let mut accumulated_blue_histo = [0u32; 256];
let mut prev_x = CrossColorMultipliers::default();
let mut prev_y = CrossColorMultipliers::default();
for ty in 0..tiles_y {
for tx in 0..tiles_x {
let start_x = tx * block_size;
let start_y = ty * block_size;
let end_x = (start_x + block_size).min(width);
let end_y = (start_y + block_size).min(height);
if ty != 0 {
let above_code = transform_data[(ty - 1) * tiles_x + tx];
prev_y.green_to_red = above_code as u8;
prev_y.green_to_blue = (above_code >> 8) as u8;
prev_y.red_to_blue = (above_code >> 16) as u8;
}
let best_g2r = get_best_green_to_red(
pixels,
width,
start_x,
start_y,
end_x,
end_y,
&prev_x,
&prev_y,
quality,
&accumulated_red_histo,
);
let (best_g2b, best_r2b) = get_best_green_red_to_blue(
pixels,
width,
start_x,
start_y,
end_x,
end_y,
&prev_x,
&prev_y,
quality,
&accumulated_blue_histo,
);
prev_x = CrossColorMultipliers {
green_to_red: best_g2r,
green_to_blue: best_g2b,
red_to_blue: best_r2b,
};
let color_code = 0xFF000000u32
| ((best_r2b as u32) << 16)
| ((best_g2b as u32) << 8)
| (best_g2r as u32);
transform_data[ty * tiles_x + tx] = color_code;
apply_cross_color_tile(pixels, width, start_x, start_y, end_x, end_y, &prev_x);
for y in start_y..end_y {
for x in start_x..end_x {
let ix = y * width + x;
let pix = pixels[ix];
if ix >= 2 && pix == pixels[ix - 2] && pix == pixels[ix - 1] {
continue;
}
if ix >= width + 2
&& pixels[ix - 2] == pixels[ix - width - 2]
&& pixels[ix - 1] == pixels[ix - width - 1]
&& pix == pixels[ix - width]
{
continue;
}
accumulated_red_histo[((pix >> 16) & 0xff) as usize] += 1;
accumulated_blue_histo[(pix & 0xff) as usize] += 1;
}
}
}
}
transform_data
}
#[inline]
fn apply_cross_color_tile(
pixels: &mut [u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
m: &CrossColorMultipliers,
) {
incant!(
apply_cross_color_tile_impl(pixels, width, start_x, start_y, end_x, end_y, m),
[v1, neon, wasm128, scalar]
);
}
fn apply_cross_color_tile_impl_scalar(
_token: ScalarToken,
pixels: &mut [u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
m: &CrossColorMultipliers,
) {
let g2r = m.green_to_red as i8;
let g2b = m.green_to_blue as i8;
let r2b = m.red_to_blue as i8;
for y in start_y..end_y {
for x in start_x..end_x {
let idx = y * width + x;
let argb = pixels[idx];
let green = (argb >> 8) as u8 as i8;
let red = (argb >> 16) as u8 as i8;
let mut new_red = (red as i32) & 0xff;
let mut new_blue = argb as i32 & 0xff;
new_red -= color_transform_delta(g2r, green);
new_red &= 0xff;
new_blue -= color_transform_delta(g2b, green);
new_blue -= color_transform_delta(r2b, red);
new_blue &= 0xff;
pixels[idx] = (argb & 0xff00ff00u32) | ((new_red as u32) << 16) | (new_blue as u32);
}
}
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn cst_5b(x: u8) -> i16 {
(((x as u16) << 8) as i16) >> 5
}
#[cfg(target_arch = "aarch64")]
fn apply_cross_color_tile_impl_neon(
_token: NeonToken,
pixels: &mut [u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
m: &CrossColorMultipliers,
) {
apply_cross_color_tile_impl_scalar(
ScalarToken,
pixels,
width,
start_x,
start_y,
end_x,
end_y,
m,
);
}
#[cfg(target_arch = "wasm32")]
fn apply_cross_color_tile_impl_wasm128(
_token: Wasm128Token,
pixels: &mut [u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
m: &CrossColorMultipliers,
) {
apply_cross_color_tile_impl_scalar(
ScalarToken,
pixels,
width,
start_x,
start_y,
end_x,
end_y,
m,
);
}
#[cfg(target_arch = "x86_64")]
#[arcane]
fn apply_cross_color_tile_impl_v1(
_token: X64V1Token,
pixels: &mut [u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
m: &CrossColorMultipliers,
) {
apply_cross_color_tile_sse2(_token, pixels, width, start_x, start_y, end_x, end_y, m);
}
#[cfg(target_arch = "x86_64")]
#[rite]
fn apply_cross_color_tile_sse2(
_token: X64V1Token,
pixels: &mut [u32],
width: usize,
start_x: usize,
start_y: usize,
end_x: usize,
end_y: usize,
m: &CrossColorMultipliers,
) {
let g2r_cst = cst_5b(m.green_to_red);
let g2b_cst = cst_5b(m.green_to_blue);
let r2b_cst = cst_5b(m.red_to_blue);
let mults_rb_val = ((g2r_cst as u16 as u32) << 16) | (g2b_cst as u16 as u32);
let mults_rb = _mm_set1_epi32(mults_rb_val as i32);
let mults_b2_val = (r2b_cst as u16 as u32) << 16;
let mults_b2 = _mm_set1_epi32(mults_b2_val as i32);
let mask_ag = _mm_set1_epi32(0xff00ff00u32 as i32);
let mask_rb = _mm_set1_epi32(0x00ff00ff_i32);
let tile_width = end_x - start_x;
for y in start_y..end_y {
let row_start = y * width + start_x;
let row = &mut pixels[row_start..row_start + tile_width];
let mut x = 0;
while x + 4 <= tile_width {
let chunk = row[x..].first_chunk_mut::<4>().unwrap();
let inp = simd_mem::_mm_loadu_si128(&*chunk);
let a_val = _mm_and_si128(inp, mask_ag);
let b_val = _mm_shufflelo_epi16(a_val, 0xA0); let c_val = _mm_shufflehi_epi16(b_val, 0xA0);
let d_val = _mm_mulhi_epi16(c_val, mults_rb);
let e_val = _mm_slli_epi16(inp, 8);
let f_val = _mm_mulhi_epi16(e_val, mults_b2);
let g_val = _mm_srli_epi32(f_val, 16);
let h_val = _mm_add_epi8(g_val, d_val);
let i_val = _mm_and_si128(h_val, mask_rb);
let out = _mm_sub_epi8(inp, i_val);
simd_mem::_mm_storeu_si128(chunk, out);
x += 4;
}
let g2r = m.green_to_red as i8;
let g2b = m.green_to_blue as i8;
let r2b = m.red_to_blue as i8;
while x < tile_width {
let argb = row[x];
let green = (argb >> 8) as u8 as i8;
let red = (argb >> 16) as u8 as i8;
let mut new_red = (red as i32) & 0xff;
let mut new_blue = argb as i32 & 0xff;
new_red -= color_transform_delta(g2r, green);
new_red &= 0xff;
new_blue -= color_transform_delta(g2b, green);
new_blue -= color_transform_delta(r2b, red);
new_blue &= 0xff;
row[x] = (argb & 0xff00ff00u32) | ((new_red as u32) << 16) | (new_blue as u32);
x += 1;
}
}
}
pub fn apply_simple_predictor(pixels: &mut [u32], width: usize, height: usize) {
for y in (1..height).rev() {
for x in (0..width).rev() {
let idx = y * width + x;
let top = pixels[(y - 1) * width + x];
pixels[idx] = residual(pixels[idx], top);
}
}
for x in (1..width).rev() {
let left = pixels[x - 1];
pixels[x] = residual(pixels[x], left);
}
pixels[0] = residual(pixels[0], 0xff000000);
}
pub struct ColorIndexTransform {
pub palette: Vec<u32>,
}
pub fn palette_xbits(palette_size: usize) -> u8 {
if palette_size <= 2 {
3
} else if palette_size <= 4 {
2
} else if palette_size <= 16 {
1
} else {
0
}
}
fn palette_color_distance(col1: u32, col2: u32) -> u32 {
let diff = sub_pixels_color(col1, col2);
let component_distance = |v: u8| -> u32 { if v <= 128 { v as u32 } else { 256 - v as u32 } };
let score = component_distance(diff as u8)
+ component_distance((diff >> 8) as u8)
+ component_distance((diff >> 16) as u8);
score * 9 + component_distance((diff >> 24) as u8)
}
fn sub_pixels_color(a: u32, b: u32) -> u32 {
let aa = (a >> 24).wrapping_sub(b >> 24) & 0xff;
let ar = ((a >> 16) & 0xff).wrapping_sub((b >> 16) & 0xff) & 0xff;
let ag = ((a >> 8) & 0xff).wrapping_sub((b >> 8) & 0xff) & 0xff;
let ab = (a & 0xff).wrapping_sub(b & 0xff) & 0xff;
(aa << 24) | (ar << 16) | (ag << 8) | ab
}
fn palette_sort_minimize_deltas(palette_sorted: &[u32]) -> Vec<u32> {
let num_colors = palette_sorted.len();
let mut palette: Vec<u32> = palette_sorted.to_vec();
if !palette_has_non_monotonous_deltas(&palette) {
return palette;
}
if num_colors > 17 && palette[0] == 0 {
let last = num_colors - 1;
palette.swap(0, last);
}
let mut predict = 0u32;
#[allow(clippy::needless_range_loop)]
for i in 0..num_colors {
let mut best_ix = i;
let mut best_score = u32::MAX;
for k in i..num_colors {
let cur_score = palette_color_distance(palette[k], predict);
if best_score > cur_score {
best_score = cur_score;
best_ix = k;
}
}
palette.swap(best_ix, i);
predict = palette[i];
}
palette
}
fn palette_has_non_monotonous_deltas(palette: &[u32]) -> bool {
let mut predict = 0u32;
let mut sign_found = 0u8;
for &color in palette {
let diff = sub_pixels_color(color, predict);
let rd = ((diff >> 16) & 0xff) as u8;
let gd = ((diff >> 8) & 0xff) as u8;
let bd = (diff & 0xff) as u8;
if rd != 0 {
sign_found |= if rd < 0x80 { 1 } else { 2 };
}
if gd != 0 {
sign_found |= if gd < 0x80 { 8 } else { 16 };
}
if bd != 0 {
sign_found |= if bd < 0x80 { 64 } else { 128 };
}
predict = color;
}
(sign_found & (sign_found << 1)) != 0
}
pub fn bundle_color_map(pixels: &[u32], width: usize, xbits: u8) -> Vec<u32> {
if xbits == 0 {
return pixels
.iter()
.map(|&p| 0xff000000 | ((argb_green(p) as u32) << 8))
.collect();
}
let bit_depth = 1u32 << (3 - xbits);
let mask = (1usize << xbits) - 1;
let packed_width = subsample_size(width as u32, xbits) as usize;
let height = pixels.len() / width;
let mut dst = Vec::with_capacity(packed_width * height);
for y in 0..height {
let row_start = y * width;
let mut code = 0xff000000u32;
for x in 0..width {
let xsub = x & mask;
if xsub == 0 {
code = 0xff000000;
}
let idx = argb_green(pixels[row_start + x]) as u32;
code |= idx << (8 + bit_depth * xsub as u32);
if xsub == mask || x == width - 1 {
dst.push(code);
}
}
}
dst
}
impl ColorIndexTransform {
pub fn try_build(pixels: &[u32]) -> Option<Self> {
Self::try_build_with_sorting(pixels, true)
}
pub fn try_build_with_sorting(pixels: &[u32], minimize_delta: bool) -> Option<Self> {
let mut seen = alloc::collections::BTreeSet::new();
for &pixel in pixels {
seen.insert(pixel);
if seen.len() > 256 {
return None;
}
}
let palette_sorted: Vec<u32> = seen.into_iter().collect();
let palette = if minimize_delta {
palette_sort_minimize_deltas(&palette_sorted)
} else {
palette_sorted
};
Some(Self { palette })
}
pub fn from_palette(palette: Vec<u32>) -> Self {
Self { palette }
}
pub fn xbits(&self) -> u8 {
palette_xbits(self.palette.len())
}
pub fn apply(&self, pixels: &mut [u32]) {
let mut lookup = alloc::collections::BTreeMap::new();
for (i, &color) in self.palette.iter().enumerate() {
lookup.insert(color, i as u8);
}
for pixel in pixels.iter_mut() {
let idx = lookup[pixel];
*pixel = make_argb(255, 0, idx, 0);
}
}
pub fn apply_and_bundle(&self, pixels: &mut [u32], width: usize) -> (Vec<u32>, usize) {
self.apply(pixels);
let xbits = self.xbits();
if xbits == 0 {
return (pixels.to_vec(), width);
}
let packed_width = subsample_size(width as u32, xbits) as usize;
let packed = bundle_color_map(pixels, width, xbits);
(packed, packed_width)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_subtract_green() {
let mut pixels = vec![make_argb(255, 100, 50, 150)];
apply_subtract_green(&mut pixels);
let p = pixels[0];
assert_eq!(argb_alpha(p), 255);
assert_eq!(argb_red(p), 50); assert_eq!(argb_green(p), 50); assert_eq!(argb_blue(p), 100); }
#[test]
fn test_average2() {
let a = make_argb(100, 100, 100, 100);
let b = make_argb(200, 200, 200, 200);
let avg = average2(a, b);
assert_eq!(argb_alpha(avg), 150);
assert_eq!(argb_red(avg), 150);
assert_eq!(argb_green(avg), 150);
assert_eq!(argb_blue(avg), 150);
}
#[test]
fn test_residual() {
let pixel = make_argb(100, 50, 80, 200);
let pred = make_argb(90, 60, 70, 150);
let res = residual(pixel, pred);
assert_eq!(argb_alpha(res), 10); assert_eq!(argb_red(res), 246); assert_eq!(argb_green(res), 10); assert_eq!(argb_blue(res), 50); }
#[test]
fn test_color_index_small_palette() {
let pixels = vec![
make_argb(255, 255, 0, 0), make_argb(255, 0, 255, 0), make_argb(255, 255, 0, 0), ];
let transform = ColorIndexTransform::try_build(&pixels).unwrap();
assert_eq!(transform.palette.len(), 2);
assert_eq!(transform.xbits(), 3); }
#[test]
fn test_color_index_too_many_colors() {
let pixels: Vec<u32> = (0..257)
.map(|i| make_argb(255, (i % 256) as u8, (i / 256) as u8, 0))
.collect();
let transform = ColorIndexTransform::try_build(&pixels);
assert!(transform.is_none());
}
#[test]
fn test_palette_bundle_2_colors() {
let indices: Vec<u32> = (0..16).map(|i| make_argb(255, 0, i & 1, 0)).collect();
let bundled = bundle_color_map(&indices, 16, 3);
assert_eq!(bundled.len(), 2);
let first = bundled[0];
assert_eq!(first & 0xff000000, 0xff000000); }
#[test]
fn test_palette_bundle_16_colors() {
let indices: Vec<u32> = (0..8)
.map(|i| make_argb(255, 0, (i * 3) & 0xf, 0))
.collect();
let bundled = bundle_color_map(&indices, 8, 1);
assert_eq!(bundled.len(), 4);
}
#[test]
fn test_palette_sort_minimize_deltas() {
let sorted = vec![
make_argb(255, 0, 0, 0), make_argb(255, 128, 128, 128), make_argb(255, 255, 0, 0), make_argb(255, 255, 255, 255), ];
let reordered = palette_sort_minimize_deltas(&sorted);
assert_eq!(reordered[0], make_argb(255, 0, 0, 0)); assert_eq!(reordered.len(), 4);
}
}