use super::row_stream::RowStream;
use archmage::autoversion;
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct PaletteStats {
pub distinct: u32,
pub indexed_width: u32,
pub fits_in_256: bool,
pub total_pixels: u64,
pub non_grayscale: u64,
}
#[inline]
fn width_for_count(count: u32) -> u32 {
if count <= 4 {
2
} else if count <= 16 {
4
} else if count <= 256 {
8
} else {
0
}
}
pub(crate) fn scan_palette(stream: &mut RowStream<'_>, want_grayscale: bool) -> PaletteStats {
let width = stream.width() as usize;
let height = stream.height();
if width == 0 || height == 0 {
return PaletteStats::default();
}
let mut flags: Box<[u8; 32_768]> = vec![0u8; 32_768]
.into_boxed_slice()
.try_into()
.expect("32 KB heap alloc for palette flag array");
let (distinct, non_grayscale) = scan_and_count(stream, &mut flags, want_grayscale);
PaletteStats {
distinct,
indexed_width: width_for_count(distinct),
fits_in_256: distinct <= 256,
total_pixels: if want_grayscale {
(width as u64) * (height as u64)
} else {
0
},
non_grayscale,
}
}
pub(crate) fn scan_palette_quick(stream: &mut RowStream<'_>) -> PaletteStats {
let width = stream.width() as usize;
let height = stream.height();
if width == 0 || height == 0 {
return PaletteStats::default();
}
let mut flags: Box<[u8; 32_768]> = vec![0u8; 32_768]
.into_boxed_slice()
.try_into()
.expect("32 KB heap alloc for palette flag array");
let (count, exceeded) = scan_quick_inner(stream, &mut flags);
PaletteStats {
distinct: 0,
indexed_width: if exceeded { 0 } else { width_for_count(count) },
fits_in_256: !exceeded,
total_pixels: 0,
non_grayscale: 0,
}
}
fn scan_and_count(
stream: &mut RowStream<'_>,
flags: &mut [u8; 32_768],
want_gray: bool,
) -> (u32, u64) {
if want_gray {
scan_and_count_gray(stream, flags)
} else {
(scan_and_count_no_gray(stream, flags), 0)
}
}
#[autoversion(v4x, v4, v3, neon, scalar)]
fn scan_and_count_no_gray(stream: &mut RowStream<'_>, flags: &mut [u8; 32_768]) -> u32 {
let width = stream.width() as usize;
let height = stream.height();
let row_bytes = width * 3;
let chunk_bytes = 24usize;
for y in 0..height {
let row = stream.borrow_row(y);
let row = &row[..row_bytes.min(row.len())];
let full_chunks = row.len() / chunk_bytes;
for c in 0..full_chunks {
let base = c * chunk_bytes;
let chunk: &[u8; 24] = (&row[base..base + chunk_bytes]).try_into().unwrap();
let mut i = 0;
while i < 8 {
let r = (chunk[i * 3] >> 3) as usize;
let g = (chunk[i * 3 + 1] >> 3) as usize;
let b = (chunk[i * 3 + 2] >> 3) as usize;
let idx = (r << 10) | (g << 5) | b;
flags[idx] = 1;
i += 1;
}
}
let tail_start = full_chunks * chunk_bytes;
for px in row[tail_start..].chunks_exact(3) {
let idx = (((px[0] >> 3) as usize) << 10)
| (((px[1] >> 3) as usize) << 5)
| ((px[2] >> 3) as usize);
flags[idx] = 1;
}
}
flags.iter().map(|&f| f as u32).sum()
}
#[autoversion(v4x, v4, v3, neon, scalar)]
fn scan_and_count_gray(stream: &mut RowStream<'_>, flags: &mut [u8; 32_768]) -> (u32, u64) {
let width = stream.width() as usize;
let height = stream.height();
let row_bytes = width * 3;
let chunk_bytes = 24usize;
let mut non_gray: u64 = 0;
for y in 0..height {
let row = stream.borrow_row(y);
let row = &row[..row_bytes.min(row.len())];
let full_chunks = row.len() / chunk_bytes;
for c in 0..full_chunks {
let base = c * chunk_bytes;
let chunk: &[u8; 24] = (&row[base..base + chunk_bytes]).try_into().unwrap();
let mut i = 0;
while i < 8 {
let r = chunk[i * 3];
let g = chunk[i * 3 + 1];
let b = chunk[i * 3 + 2];
let idx = (((r >> 3) as usize) << 10)
| (((g >> 3) as usize) << 5)
| ((b >> 3) as usize);
flags[idx] = 1;
let mx = r.max(g).max(b);
let mn = r.min(g).min(b);
non_gray += (mx - mn > 4) as u64;
i += 1;
}
}
let tail_start = full_chunks * chunk_bytes;
for px in row[tail_start..].chunks_exact(3) {
let r = px[0];
let g = px[1];
let b = px[2];
let idx = (((r >> 3) as usize) << 10)
| (((g >> 3) as usize) << 5)
| ((b >> 3) as usize);
flags[idx] = 1;
let mx = r.max(g).max(b);
let mn = r.min(g).min(b);
non_gray += (mx - mn > 4) as u64;
}
}
let distinct: u32 = flags.iter().map(|&f| f as u32).sum();
(distinct, non_gray)
}
#[autoversion(v4x, v4, v3, neon, scalar)]
fn scan_quick_inner(stream: &mut RowStream<'_>, flags: &mut [u8; 32_768]) -> (u32, bool) {
let width = stream.width() as usize;
let height = stream.height();
let row_bytes = width * 3;
let chunk_bytes = 24usize;
let mut count: u32 = 0;
for y in 0..height {
let row = stream.borrow_row(y);
let row = &row[..row_bytes.min(row.len())];
let full_chunks = row.len() / chunk_bytes;
for c in 0..full_chunks {
let base = c * chunk_bytes;
let chunk: &[u8; 24] = (&row[base..base + chunk_bytes]).try_into().unwrap();
let mut i = 0;
while i < 8 {
let r = (chunk[i * 3] >> 3) as usize;
let g = (chunk[i * 3 + 1] >> 3) as usize;
let b = (chunk[i * 3 + 2] >> 3) as usize;
let idx = (r << 10) | (g << 5) | b;
if flags[idx] == 0 {
flags[idx] = 1;
count += 1;
if count > 256 {
return (count, true);
}
}
i += 1;
}
}
let tail_start = full_chunks * chunk_bytes;
for px in row[tail_start..].chunks_exact(3) {
let idx = (((px[0] >> 3) as usize) << 10)
| (((px[1] >> 3) as usize) << 5)
| ((px[2] >> 3) as usize);
if flags[idx] == 0 {
flags[idx] = 1;
count += 1;
if count > 256 {
return (count, true);
}
}
}
}
(count, false)
}