#![cfg_attr(target_arch = "x86", allow(unused_imports))]
use archmage::prelude::*;
use crate::SizeError;
#[inline(always)]
fn expand2_to_10(a: u32) -> u16 {
let a = a & 0x3;
(a * 0x155) as u16 & 0x3FF
}
#[inline(always)]
fn compress_10_to_2(a10: u16) -> u32 {
let a = a10 as u32 & 0x3FF;
(a * 3 + 511) / 1023
}
#[inline(always)]
fn unpack_one_to_rgba16(src: &[u8; 4], dst: &mut [u16; 4]) {
let v = u32::from_le_bytes(*src);
dst[0] = (v & 0x3FF) as u16;
dst[1] = ((v >> 10) & 0x3FF) as u16;
dst[2] = ((v >> 20) & 0x3FF) as u16;
dst[3] = expand2_to_10(v >> 30);
}
#[autoversion(v3, neon, wasm128)]
fn rgba1010102_to_rgba16_impl(src: &[u8], dst: &mut [u16]) {
for (s, d) in src.chunks_exact(4).zip(dst.chunks_exact_mut(4)) {
let s4: &[u8; 4] = s.try_into().unwrap();
let d4: &mut [u16; 4] = d.try_into().unwrap();
unpack_one_to_rgba16(s4, d4);
}
}
#[autoversion(v3, neon, wasm128)]
fn rgba1010102_to_rgba16_strided_impl(
src: &[u8],
dst: &mut [u16],
w: usize,
h: usize,
ss: usize,
ds: usize,
) {
for y in 0..h {
for (s, d) in src[y * ss..][..w * 4]
.chunks_exact(4)
.zip(dst[y * ds..][..w * 4].chunks_exact_mut(4))
{
let s4: &[u8; 4] = s.try_into().unwrap();
let d4: &mut [u16; 4] = d.try_into().unwrap();
unpack_one_to_rgba16(s4, d4);
}
}
}
#[inline(always)]
fn pack_one_from_rgba16(src: &[u16; 4], dst: &mut [u8; 4]) {
let r = (src[0] as u32) & 0x3FF;
let g = (src[1] as u32) & 0x3FF;
let b = (src[2] as u32) & 0x3FF;
let a = compress_10_to_2(src[3]);
let v = r | (g << 10) | (b << 20) | (a << 30);
*dst = v.to_le_bytes();
}
#[autoversion(v3, neon, wasm128)]
fn rgba16_to_rgba1010102_impl(src: &[u16], dst: &mut [u8]) {
for (s, d) in src.chunks_exact(4).zip(dst.chunks_exact_mut(4)) {
let s4: &[u16; 4] = s.try_into().unwrap();
let d4: &mut [u8; 4] = d.try_into().unwrap();
pack_one_from_rgba16(s4, d4);
}
}
#[autoversion(v3, neon, wasm128)]
fn rgba16_to_rgba1010102_strided_impl(
src: &[u16],
dst: &mut [u8],
w: usize,
h: usize,
ss: usize,
ds: usize,
) {
for y in 0..h {
for (s, d) in src[y * ss..][..w * 4]
.chunks_exact(4)
.zip(dst[y * ds..][..w * 4].chunks_exact_mut(4))
{
let s4: &[u16; 4] = s.try_into().unwrap();
let d4: &mut [u8; 4] = d.try_into().unwrap();
pack_one_from_rgba16(s4, d4);
}
}
}
#[inline]
fn check_unpack(src_bytes: usize, dst_u16: usize) -> Result<(), SizeError> {
if src_bytes == 0 || !src_bytes.is_multiple_of(4) {
return Err(SizeError::NotPixelAligned);
}
let pixels = src_bytes / 4;
if dst_u16 < pixels * 4 {
return Err(SizeError::PixelCountMismatch);
}
Ok(())
}
#[inline]
fn check_pack(src_u16: usize, dst_bytes: usize) -> Result<(), SizeError> {
if src_u16 == 0 || !src_u16.is_multiple_of(4) {
return Err(SizeError::NotPixelAligned);
}
let pixels = src_u16 / 4;
if dst_bytes < pixels * 4 {
return Err(SizeError::PixelCountMismatch);
}
Ok(())
}
#[inline]
fn check_strided_bytes(
len: usize,
width: usize,
height: usize,
stride: usize,
bpp: usize,
) -> Result<(), SizeError> {
if width == 0 || height == 0 {
return Err(SizeError::InvalidStride);
}
let row_bytes = width.checked_mul(bpp).ok_or(SizeError::InvalidStride)?;
if row_bytes > stride {
return Err(SizeError::InvalidStride);
}
let total = (height - 1)
.checked_mul(stride)
.ok_or(SizeError::InvalidStride)?
.checked_add(row_bytes)
.ok_or(SizeError::InvalidStride)?;
if len < total {
return Err(SizeError::InvalidStride);
}
Ok(())
}
pub fn rgba1010102_to_rgba16(src: &[u8], dst: &mut [u16]) -> Result<(), SizeError> {
check_unpack(src.len(), dst.len())?;
rgba1010102_to_rgba16_impl(src, dst);
Ok(())
}
pub fn rgba16_to_rgba1010102(src: &[u16], dst: &mut [u8]) -> Result<(), SizeError> {
check_pack(src.len(), dst.len())?;
rgba16_to_rgba1010102_impl(src, dst);
Ok(())
}
pub fn rgba1010102_to_rgba16_strided(
src: &[u8],
dst: &mut [u16],
width: usize,
height: usize,
src_stride: usize,
dst_stride: usize,
) -> Result<(), SizeError> {
check_strided_bytes(src.len(), width, height, src_stride, 4)?;
check_strided_bytes(dst.len(), width, height, dst_stride, 4)?;
rgba1010102_to_rgba16_strided_impl(src, dst, width, height, src_stride, dst_stride);
Ok(())
}
pub fn rgba16_to_rgba1010102_strided(
src: &[u16],
dst: &mut [u8],
width: usize,
height: usize,
src_stride: usize,
dst_stride: usize,
) -> Result<(), SizeError> {
check_strided_bytes(src.len(), width, height, src_stride, 4)?;
check_strided_bytes(dst.len(), width, height, dst_stride, 4)?;
rgba16_to_rgba1010102_strided_impl(src, dst, width, height, src_stride, dst_stride);
Ok(())
}
#[cfg(test)]
mod tests {
extern crate alloc;
use super::*;
use alloc::vec;
#[test]
fn expand2_to_10_table() {
assert_eq!(expand2_to_10(0b00), 0);
assert_eq!(expand2_to_10(0b01), 0b01_0101_0101);
assert_eq!(expand2_to_10(0b10), 0b10_1010_1010);
assert_eq!(expand2_to_10(0b11), 0b11_1111_1111);
assert_eq!(expand2_to_10(0xFFFF_FFFC), 0);
assert_eq!(expand2_to_10(0xFFFF_FFFF), 1023);
}
#[test]
fn compress_10_to_2_endpoints() {
assert_eq!(compress_10_to_2(0), 0);
assert_eq!(compress_10_to_2(1023), 3);
}
#[test]
fn compress_inverts_expand_for_legal_values() {
for a2 in 0..=3u32 {
let expanded = expand2_to_10(a2);
assert_eq!(
compress_10_to_2(expanded),
a2,
"compress(expand({a2})) = compress({expanded}) != {a2}"
);
}
}
#[test]
fn compress_rounds_to_nearest() {
assert_eq!(compress_10_to_2(341), 1);
assert_eq!(compress_10_to_2(511), 1);
assert_eq!(compress_10_to_2(512), 2);
}
#[test]
fn unpack_known_values_rgba() {
let mut dst = [0u16; 4];
rgba1010102_to_rgba16(&[0u8; 4], &mut dst).unwrap();
assert_eq!(dst, [0, 0, 0, 0]);
rgba1010102_to_rgba16(&[0xFF, 0x03, 0x00, 0x00], &mut dst).unwrap();
assert_eq!(dst, [1023, 0, 0, 0]);
rgba1010102_to_rgba16(&[0x00, 0xFC, 0x0F, 0x00], &mut dst).unwrap();
assert_eq!(dst, [0, 1023, 0, 0]);
rgba1010102_to_rgba16(&[0x00, 0x00, 0xF0, 0x3F], &mut dst).unwrap();
assert_eq!(dst, [0, 0, 1023, 0]);
rgba1010102_to_rgba16(&[0x00, 0x00, 0x00, 0xC0], &mut dst).unwrap();
assert_eq!(dst, [0, 0, 0, 1023]);
rgba1010102_to_rgba16(&[0xFF, 0xFF, 0xFF, 0xFF], &mut dst).unwrap();
assert_eq!(dst, [1023, 1023, 1023, 1023]);
}
#[test]
fn pack_known_values_rgba() {
let mut dst = [0u8; 4];
rgba16_to_rgba1010102(&[1023, 0, 0, 0], &mut dst).unwrap();
assert_eq!(dst, [0xFF, 0x03, 0x00, 0x00]);
rgba16_to_rgba1010102(&[0, 1023, 0, 0], &mut dst).unwrap();
assert_eq!(dst, [0x00, 0xFC, 0x0F, 0x00]);
rgba16_to_rgba1010102(&[0, 0, 1023, 0], &mut dst).unwrap();
assert_eq!(dst, [0x00, 0x00, 0xF0, 0x3F]);
rgba16_to_rgba1010102(&[0, 0, 0, 1023], &mut dst).unwrap();
assert_eq!(dst, [0x00, 0x00, 0x00, 0xC0]);
rgba16_to_rgba1010102(&[1023, 1023, 1023, 1023], &mut dst).unwrap();
assert_eq!(dst, [0xFF, 0xFF, 0xFF, 0xFF]);
}
struct Lcg(u64);
impl Lcg {
fn new(seed: u64) -> Self {
Self(seed.wrapping_mul(0x9E37_79B9_7F4A_7C15) ^ 0xDEAD_BEEF_CAFE_F00D)
}
fn next_u32(&mut self) -> u32 {
self.0 = self
.0
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(self.0 >> 32) as u32
}
}
#[test]
fn forward_round_trip_random_packed() {
let mut rng = Lcg::new(0xA110_1010_2222_3333);
let n_pixels = 4096;
let mut packed = vec![0u8; n_pixels * 4];
for chunk in packed.chunks_exact_mut(4) {
chunk.copy_from_slice(&rng.next_u32().to_le_bytes());
}
let mut chans = vec![0u16; n_pixels * 4];
rgba1010102_to_rgba16(&packed, &mut chans).unwrap();
let mut repacked = vec![0u8; n_pixels * 4];
rgba16_to_rgba1010102(&chans, &mut repacked).unwrap();
assert_eq!(packed, repacked);
}
#[test]
fn forward_round_trip_all_alpha_values() {
let samples_per_chan: [u16; 8] = [0, 1, 256, 511, 512, 768, 1022, 1023];
for &r in &samples_per_chan {
for &g in &samples_per_chan {
for &b in &samples_per_chan {
for a in 0..=3u32 {
let a10 = expand2_to_10(a);
let chans = [r, g, b, a10];
let mut packed = [0u8; 4];
rgba16_to_rgba1010102(&chans, &mut packed).unwrap();
let mut back = [0u16; 4];
rgba1010102_to_rgba16(&packed, &mut back).unwrap();
assert_eq!(
back, chans,
"round-trip failed for {chans:?} -> {packed:?} -> {back:?}"
);
}
}
}
}
}
#[test]
fn reverse_round_trip_all_packed_values_sample() {
let mut rng = Lcg::new(0xBEEF_F00D_F00D_BEEF);
for _ in 0..65536 {
let v = rng.next_u32();
let bytes = v.to_le_bytes();
let mut chans = [0u16; 4];
rgba1010102_to_rgba16(&bytes, &mut chans).unwrap();
let mut back = [0u8; 4];
rgba16_to_rgba1010102(&chans, &mut back).unwrap();
assert_eq!(back, bytes, "x = 0x{v:08X}");
}
}
#[test]
fn zero_length_rejected() {
let mut dst = [0u16; 0];
assert_eq!(
rgba1010102_to_rgba16(&[], &mut dst),
Err(SizeError::NotPixelAligned)
);
let mut dst_bytes = [0u8; 0];
assert_eq!(
rgba16_to_rgba1010102(&[], &mut dst_bytes),
Err(SizeError::NotPixelAligned)
);
}
#[test]
fn unaligned_lengths_rejected() {
let mut dst = [0u16; 4];
assert_eq!(
rgba1010102_to_rgba16(&[0, 0, 0], &mut dst),
Err(SizeError::NotPixelAligned)
);
let mut dst_bytes = [0u8; 4];
assert_eq!(
rgba16_to_rgba1010102(&[0u16, 0, 0], &mut dst_bytes),
Err(SizeError::NotPixelAligned)
);
}
#[test]
fn dst_too_small_rejected() {
let src = [0u8; 8]; let mut dst = [0u16; 7]; assert_eq!(
rgba1010102_to_rgba16(&src, &mut dst),
Err(SizeError::PixelCountMismatch)
);
let src_chans = [0u16; 8]; let mut dst_bytes = [0u8; 7]; assert_eq!(
rgba16_to_rgba1010102(&src_chans, &mut dst_bytes),
Err(SizeError::PixelCountMismatch)
);
}
#[test]
fn single_pixel_works() {
let src = [0xFF, 0xFF, 0xFF, 0xFF];
let mut dst = [0u16; 4];
rgba1010102_to_rgba16(&src, &mut dst).unwrap();
assert_eq!(dst, [1023, 1023, 1023, 1023]);
}
#[test]
fn pack_clamps_via_mask() {
let mut dst = [0u8; 4];
rgba16_to_rgba1010102(&[0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF], &mut dst).unwrap();
assert_eq!(dst, [0xFF, 0xFF, 0xFF, 0xFF]);
}
#[test]
fn alpha_2bit_expansion_documented_values() {
let cases = [(0u8, 0u16), (1, 341), (2, 682), (3, 1023)];
for (a2, want) in cases {
let v = (a2 as u32) << 30;
let bytes = v.to_le_bytes();
let mut dst = [0u16; 4];
rgba1010102_to_rgba16(&bytes, &mut dst).unwrap();
assert_eq!(dst[3], want, "alpha {a2}");
}
}
#[test]
fn strided_unpack_matches_contiguous() {
let width = 7;
let height = 5;
let src_stride_bytes = width * 4 + 6; let dst_stride_u16 = width * 4 + 3;
let mut src = vec![0u8; src_stride_bytes * height];
for y in 0..height {
for x in 0..width {
let v = (y as u32 * 1000 + x as u32 * 7).wrapping_mul(0xDEAD);
src[y * src_stride_bytes + x * 4..][..4].copy_from_slice(&v.to_le_bytes());
}
}
let mut dst = vec![0u16; dst_stride_u16 * height];
rgba1010102_to_rgba16_strided(
&src,
&mut dst,
width,
height,
src_stride_bytes,
dst_stride_u16,
)
.unwrap();
for y in 0..height {
let s_row = &src[y * src_stride_bytes..][..width * 4];
let mut want = vec![0u16; width * 4];
rgba1010102_to_rgba16(s_row, &mut want).unwrap();
assert_eq!(
&dst[y * dst_stride_u16..][..width * 4],
want.as_slice(),
"row {y}"
);
for &p in &dst[y * dst_stride_u16 + width * 4..(y + 1) * dst_stride_u16] {
assert_eq!(p, 0, "padding mutated in row {y}");
}
}
}
#[test]
fn strided_pack_matches_contiguous() {
let width = 3;
let height = 4;
let src_stride_u16 = width * 4 + 5;
let dst_stride_bytes = width * 4 + 7;
let mut src = vec![0u16; src_stride_u16 * height];
for y in 0..height {
for x in 0..width {
let base = (y * 100 + x * 11) as u16;
let off = y * src_stride_u16 + x * 4;
src[off] = base & 0x3FF;
src[off + 1] = (base.wrapping_mul(3)) & 0x3FF;
src[off + 2] = (base.wrapping_mul(5)) & 0x3FF;
src[off + 3] = expand2_to_10((base as u32) & 0x3);
}
}
let mut dst = vec![0u8; dst_stride_bytes * height];
rgba16_to_rgba1010102_strided(
&src,
&mut dst,
width,
height,
src_stride_u16,
dst_stride_bytes,
)
.unwrap();
for y in 0..height {
let s_row = &src[y * src_stride_u16..][..width * 4];
let mut want = vec![0u8; width * 4];
rgba16_to_rgba1010102(s_row, &mut want).unwrap();
assert_eq!(
&dst[y * dst_stride_bytes..][..width * 4],
want.as_slice(),
"row {y}"
);
}
}
#[test]
fn strided_invalid_dimensions_rejected() {
let mut dst = [0u16; 16];
assert_eq!(
rgba1010102_to_rgba16_strided(&[0u8; 16], &mut dst, 0, 1, 4, 4),
Err(SizeError::InvalidStride)
);
assert_eq!(
rgba1010102_to_rgba16_strided(&[0u8; 16], &mut dst, 4, 1, 8, 16),
Err(SizeError::InvalidStride)
);
assert_eq!(
rgba1010102_to_rgba16_strided(&[0u8; 16], &mut dst, 1, 5, 4, 4),
Err(SizeError::InvalidStride)
);
}
#[test]
fn round_trip_via_strided() {
let width = 5;
let height = 3;
let src_stride_bytes = width * 4 + 2;
let intermediate_stride_u16 = width * 4 + 4;
let dst_stride_bytes = width * 4 + 6;
let mut rng = Lcg::new(7);
let mut src = vec![0u8; src_stride_bytes * height];
for y in 0..height {
for x in 0..width {
let off = y * src_stride_bytes + x * 4;
src[off..off + 4].copy_from_slice(&rng.next_u32().to_le_bytes());
}
}
let mut chans = vec![0u16; intermediate_stride_u16 * height];
rgba1010102_to_rgba16_strided(
&src,
&mut chans,
width,
height,
src_stride_bytes,
intermediate_stride_u16,
)
.unwrap();
let mut back = vec![0u8; dst_stride_bytes * height];
rgba16_to_rgba1010102_strided(
&chans,
&mut back,
width,
height,
intermediate_stride_u16,
dst_stride_bytes,
)
.unwrap();
for y in 0..height {
assert_eq!(
&back[y * dst_stride_bytes..][..width * 4],
&src[y * src_stride_bytes..][..width * 4],
"row {y}"
);
}
}
}