fn sub32_bit_flt(a: u32, b: u32) -> u32 {
const FLT_MANT_MASK: u32 = 0x007F_FFFF;
const FLT_9BIT_MASK: u32 = 0xFF80_0000;
let ret = (a.wrapping_sub(b)) & FLT_MANT_MASK;
let ae = ((a & FLT_9BIT_MASK) >> 23) & 0x1FF;
let be = ((b & FLT_9BIT_MASK) >> 23) & 0x1FF;
ret | (((ae.wrapping_sub(be)) & 0x1FF) << 23)
}
fn add32_bit_flt(a: u32, b: u32) -> u32 {
const FLT_MANT_MASK: u32 = 0x007F_FFFF;
const FLT_9BIT_MASK: u32 = 0xFF80_0000;
let ret = (a.wrapping_add(b)) & FLT_MANT_MASK;
let ae = ((a & FLT_9BIT_MASK) >> 23) & 0x1FF;
let be = ((b & FLT_9BIT_MASK) >> 23) & 0x1FF;
ret | (((ae.wrapping_add(be)) & 0x1FF) << 23)
}
fn sub64_bit_dbl(a: u64, b: u64) -> u64 {
const DBL_MANT_MASK: u64 = 0x000F_FFFF_FFFF_FFFF;
const DBL_12BIT_MASK: u64 = 0xFFF0_0000_0000_0000;
let ret = (a.wrapping_sub(b)) & DBL_MANT_MASK;
let ae = ((a & DBL_12BIT_MASK) >> 52) & 0xFFF;
let be = ((b & DBL_12BIT_MASK) >> 52) & 0xFFF;
ret | (((ae.wrapping_sub(be)) & 0xFFF) << 52)
}
fn add64_bit_dbl(a: u64, b: u64) -> u64 {
const DBL_MANT_MASK: u64 = 0x000F_FFFF_FFFF_FFFF;
const DBL_12BIT_MASK: u64 = 0xFFF0_0000_0000_0000;
let ret = (a.wrapping_add(b)) & DBL_MANT_MASK;
let ae = ((a & DBL_12BIT_MASK) >> 52) & 0xFFF;
let be = ((b & DBL_12BIT_MASK) >> 52) & 0xFFF;
ret | (((ae.wrapping_add(be)) & 0xFFF) << 52)
}
#[inline]
fn read_u32_le(data: &[u8], offset: usize) -> u32 {
u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap())
}
#[inline]
fn write_u32_le(data: &mut [u8], offset: usize, val: u32) {
data[offset..offset + 4].copy_from_slice(&val.to_le_bytes());
}
#[inline]
fn read_u64_le(data: &[u8], offset: usize) -> u64 {
u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap())
}
#[inline]
fn write_u64_le(data: &mut [u8], offset: usize, val: u64) {
data[offset..offset + 8].copy_from_slice(&val.to_le_bytes());
}
pub(super) fn restore_sequence(data: &mut [u8], _width: usize, level: u8) {
if data.is_empty() || level == 0 {
return;
}
let size = data.len();
for l in (1..=level as usize).rev() {
for i in l..size {
data[i] = data[i].wrapping_add(data[i - 1]);
}
}
}
pub(super) fn restore_cross_bytes(data: &mut [u8], width: usize, height: usize, unit_size: usize) {
match unit_size {
4 => restore_cross_bytes_f32(data, width, height),
8 => restore_cross_bytes_f64(data, width, height),
_ => unreachable!("unexpected unit_size {unit_size}"),
}
}
fn restore_cross_bytes_f32(data: &mut [u8], width: usize, height: usize) {
for col in 0..width {
let mut offset = width; for _row in 1..height {
let prev_off = (offset - width) * 4 + col * 4;
let cur_off = offset * 4 + col * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, add32_bit_flt(cur, prev));
offset += width;
}
}
for row in 0..height {
for col in 1..width {
let prev_off = (row * width + col - 1) * 4;
let cur_off = (row * width + col) * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, add32_bit_flt(cur, prev));
}
}
}
fn restore_cross_bytes_f64(data: &mut [u8], width: usize, height: usize) {
for col in 0..width {
let mut offset = width;
for _row in 1..height {
let prev_off = (offset - width) * 8 + col * 8;
let cur_off = offset * 8 + col * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, add64_bit_dbl(cur, prev));
offset += width;
}
}
for row in 0..height {
for col in 1..width {
let prev_off = (row * width + col - 1) * 8;
let cur_off = (row * width + col) * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, add64_bit_dbl(cur, prev));
}
}
}
pub(super) fn restore_byte_order(data: &mut [u8], width: usize, height: usize, unit_size: usize) {
match unit_size {
4 => restore_byte_order_f32(data, width, height),
8 => restore_byte_order_f64(data, width, height),
_ => unreachable!("unexpected unit_size {unit_size}"),
}
}
fn restore_byte_order_f32(data: &mut [u8], width: usize, height: usize) {
for row in 0..height {
for col in 1..width {
let prev_off = (row * width + col - 1) * 4;
let cur_off = (row * width + col) * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, add32_bit_flt(cur, prev));
}
}
}
fn restore_byte_order_f64(data: &mut [u8], width: usize, height: usize) {
for row in 0..height {
for col in 1..width {
let prev_off = (row * width + col - 1) * 8;
let cur_off = (row * width + col) * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, add64_bit_dbl(cur, prev));
}
}
}
pub(super) fn apply_sequence(data: &mut [u8], _width: usize, level: u8) {
if data.is_empty() || level == 0 {
return;
}
let size = data.len();
for l in 1..=level as usize {
for i in (l..size).rev() {
data[i] = data[i].wrapping_sub(data[i - 1]);
}
}
}
pub(super) fn apply_cross_bytes(data: &mut [u8], width: usize, height: usize, unit_size: usize) {
match unit_size {
4 => apply_cross_bytes_f32(data, width, height),
8 => apply_cross_bytes_f64(data, width, height),
_ => unreachable!("unexpected unit_size {unit_size}"),
}
}
fn apply_cross_bytes_f32(data: &mut [u8], width: usize, height: usize) {
for row in 0..height {
for col in (1..width).rev() {
let prev_off = (row * width + col - 1) * 4;
let cur_off = (row * width + col) * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, sub32_bit_flt(cur, prev));
}
}
for col in 0..width {
let mut offset = (height - 1) * width;
for _row in (1..height).rev() {
let cur_off = (offset + col) * 4;
let prev_off = (offset - width + col) * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, sub32_bit_flt(cur, prev));
offset -= width;
}
}
}
fn apply_cross_bytes_f64(data: &mut [u8], width: usize, height: usize) {
for row in 0..height {
for col in (1..width).rev() {
let prev_off = (row * width + col - 1) * 8;
let cur_off = (row * width + col) * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, sub64_bit_dbl(cur, prev));
}
}
for col in 0..width {
let mut offset = (height - 1) * width;
for _row in (1..height).rev() {
let cur_off = (offset + col) * 8;
let prev_off = (offset - width + col) * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, sub64_bit_dbl(cur, prev));
offset -= width;
}
}
}
pub(super) fn apply_cross_bytes_phase2(
data: &mut [u8],
width: usize,
height: usize,
unit_size: usize,
) {
match unit_size {
4 => apply_cross_bytes_phase2_f32(data, width, height),
8 => apply_cross_bytes_phase2_f64(data, width, height),
_ => unreachable!("unexpected unit_size {unit_size}"),
}
}
fn apply_cross_bytes_phase2_f32(data: &mut [u8], width: usize, height: usize) {
for col in 0..width {
let mut offset = (height - 1) * width;
for _row in (1..height).rev() {
let cur_off = (offset + col) * 4;
let prev_off = (offset - width + col) * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, sub32_bit_flt(cur, prev));
offset -= width;
}
}
}
fn apply_cross_bytes_phase2_f64(data: &mut [u8], width: usize, height: usize) {
for col in 0..width {
let mut offset = (height - 1) * width;
for _row in (1..height).rev() {
let cur_off = (offset + col) * 8;
let prev_off = (offset - width + col) * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, sub64_bit_dbl(cur, prev));
offset -= width;
}
}
}
pub(super) fn apply_byte_order(data: &mut [u8], width: usize, height: usize, unit_size: usize) {
match unit_size {
4 => apply_byte_order_f32(data, width, height),
8 => apply_byte_order_f64(data, width, height),
_ => unreachable!("unexpected unit_size {unit_size}"),
}
}
fn apply_byte_order_f32(data: &mut [u8], width: usize, height: usize) {
for row in 0..height {
for col in (1..width).rev() {
let prev_off = (row * width + col - 1) * 4;
let cur_off = (row * width + col) * 4;
let prev = read_u32_le(data, prev_off);
let cur = read_u32_le(data, cur_off);
write_u32_le(data, cur_off, sub32_bit_flt(cur, prev));
}
}
}
fn apply_byte_order_f64(data: &mut [u8], width: usize, height: usize) {
for row in 0..height {
for col in (1..width).rev() {
let prev_off = (row * width + col - 1) * 8;
let cur_off = (row * width + col) * 8;
let prev = read_u64_le(data, prev_off);
let cur = read_u64_le(data, cur_off);
write_u64_le(data, cur_off, sub64_bit_dbl(cur, prev));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sub_add_32_bit_flt_known_values() {
assert_eq!(sub32_bit_flt(0x3F80_0000, 0x3F80_0000), 0);
assert_eq!(sub32_bit_flt(0x3F80_0000, 0), 0x3F80_0000);
assert_eq!(add32_bit_flt(0, 0x3F80_0000), 0x3F80_0000);
}
#[test]
fn sub_add_64_bit_dbl_known_values() {
let one_f64 = 1.0f64.to_bits();
assert_eq!(sub64_bit_dbl(one_f64, one_f64), 0);
assert_eq!(sub64_bit_dbl(one_f64, 0), one_f64);
assert_eq!(add64_bit_dbl(0, one_f64), one_f64);
}
#[test]
fn apply_sequence_level_zero_is_noop() {
let original = vec![1u8, 2, 3, 4, 5];
let mut data = original.clone();
apply_sequence(&mut data, 5, 0);
assert_eq!(data, original);
}
#[test]
fn apply_sequence_empty_is_noop() {
let mut data: Vec<u8> = vec![];
apply_sequence(&mut data, 0, 3);
assert!(data.is_empty());
}
#[test]
fn apply_sequence_level1_known() {
let mut data = vec![10u8, 20, 35, 55];
apply_sequence(&mut data, 4, 1);
assert_eq!(data, vec![10, 10, 15, 20]);
}
#[test]
fn apply_restore_cross_bytes_f32_round_trip() {
let width = 4;
let height = 4;
let floats: Vec<f32> = (0..16).map(|i| i as f32 * 1.5 + 0.25).collect();
let mut data: Vec<u8> = floats.iter().flat_map(|f| f.to_le_bytes()).collect();
let original = data.clone();
apply_cross_bytes(&mut data, width, height, 4);
assert_ne!(data, original, "forward transform should modify data");
restore_cross_bytes(&mut data, width, height, 4);
assert_eq!(data, original, "round-trip should recover original");
}
#[test]
fn apply_restore_cross_bytes_f64_round_trip() {
let width = 3;
let height = 3;
let doubles: Vec<f64> = (0..9).map(|i| i as f64 * 2.5 - 1.0).collect();
let mut data: Vec<u8> = doubles.iter().flat_map(|d| d.to_le_bytes()).collect();
let original = data.clone();
apply_cross_bytes(&mut data, width, height, 8);
assert_ne!(data, original, "forward transform should modify data");
restore_cross_bytes(&mut data, width, height, 8);
assert_eq!(data, original, "round-trip should recover original");
}
#[test]
fn apply_restore_byte_order_f32_round_trip() {
let width = 4;
let height = 4;
let floats: Vec<f32> = (0..16).map(|i| (i as f32).sin() * 100.0).collect();
let mut data: Vec<u8> = floats.iter().flat_map(|f| f.to_le_bytes()).collect();
let original = data.clone();
apply_byte_order(&mut data, width, height, 4);
assert_ne!(data, original, "forward transform should modify data");
restore_byte_order(&mut data, width, height, 4);
assert_eq!(data, original, "round-trip should recover original");
}
#[test]
fn apply_restore_byte_order_f64_round_trip() {
let width = 3;
let height = 3;
let doubles: Vec<f64> = (0..9).map(|i| (i as f64).cos() * 200.0).collect();
let mut data: Vec<u8> = doubles.iter().flat_map(|d| d.to_le_bytes()).collect();
let original = data.clone();
apply_byte_order(&mut data, width, height, 8);
assert_ne!(data, original, "forward transform should modify data");
restore_byte_order(&mut data, width, height, 8);
assert_eq!(data, original, "round-trip should recover original");
}
#[test]
fn apply_cross_bytes_phase2_only_column_deltas_f32() {
let width = 3;
let height = 3;
let floats: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let mut data: Vec<u8> = floats.iter().flat_map(|f| f.to_le_bytes()).collect();
apply_cross_bytes_phase2(&mut data, width, height, 4);
let result: Vec<u32> = (0..9).map(|i| read_u32_le(&data, i * 4)).collect();
let row0_original: Vec<u32> = [1.0f32, 2.0, 3.0].iter().map(|f| f.to_bits()).collect();
assert_eq!(&result[0..3], &row0_original[..]);
for col in 0..width {
let r1_val = result[width + col];
let expected_r1 = sub32_bit_flt(floats[width + col].to_bits(), floats[col].to_bits());
assert_eq!(r1_val, expected_r1, "row1 col{col} should be col delta");
let r2_val = result[2 * width + col];
let expected_r2 = sub32_bit_flt(
floats[2 * width + col].to_bits(),
floats[width + col].to_bits(),
);
assert_eq!(r2_val, expected_r2, "row2 col{col} should be col delta");
}
}
#[test]
fn apply_cross_bytes_phase2_f64_only_column_deltas() {
let width = 2;
let height = 3;
let doubles: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let mut data: Vec<u8> = doubles.iter().flat_map(|d| d.to_le_bytes()).collect();
apply_cross_bytes_phase2(&mut data, width, height, 8);
let result: Vec<u64> = (0..6).map(|i| read_u64_le(&data, i * 8)).collect();
for col in 0..width {
assert_eq!(result[col], doubles[col].to_bits());
}
for col in 0..width {
let expected = sub64_bit_dbl(doubles[width + col].to_bits(), doubles[col].to_bits());
assert_eq!(result[width + col], expected);
}
}
#[cfg(not(target_arch = "wasm32"))]
mod proptest_tests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn sub_add_32_bit_flt_round_trip(a: u32, b: u32) {
let diff = sub32_bit_flt(a, b);
let recovered = add32_bit_flt(diff, b);
prop_assert_eq!(recovered, a);
}
#[test]
fn sub_add_64_bit_dbl_round_trip(a: u64, b: u64) {
let diff = sub64_bit_dbl(a, b);
let recovered = add64_bit_dbl(diff, b);
prop_assert_eq!(recovered, a);
}
}
proptest! {
#[test]
fn apply_restore_sequence_round_trip(level in 1..5u8, len in 10..200usize) {
let original: Vec<u8> = (0..len).map(|i| (i * 7 + 13) as u8).collect();
let mut data = original.clone();
apply_sequence(&mut data, len, level);
restore_sequence(&mut data, len, level);
prop_assert_eq!(data, original);
}
}
proptest! {
#[test]
fn apply_restore_cross_bytes_f32_prop(
width in 2..8usize,
height in 2..8usize,
seed in 0u32..1000,
) {
let n = width * height;
let floats: Vec<f32> = (0..n).map(|i| ((i as u32).wrapping_mul(seed.wrapping_add(1))) as f32).collect();
let mut data: Vec<u8> = floats.iter().flat_map(|f| f.to_le_bytes()).collect();
let original = data.clone();
apply_cross_bytes(&mut data, width, height, 4);
restore_cross_bytes(&mut data, width, height, 4);
prop_assert_eq!(data, original);
}
#[test]
fn apply_restore_byte_order_f32_prop(
width in 2..8usize,
height in 2..8usize,
seed in 0u32..1000,
) {
let n = width * height;
let floats: Vec<f32> = (0..n).map(|i| ((i as u32).wrapping_mul(seed.wrapping_add(1))) as f32).collect();
let mut data: Vec<u8> = floats.iter().flat_map(|f| f.to_le_bytes()).collect();
let original = data.clone();
apply_byte_order(&mut data, width, height, 4);
restore_byte_order(&mut data, width, height, 4);
prop_assert_eq!(data, original);
}
}
}
}