use super::PixelArray;
use core::simd::{
SimdPartialOrd,
SimdUint,
simd_swizzle,
usizex8,
u8x4,
u16x4,
u8x32,
u16x32,
};
#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct EightPixels(u16x32);
impl EightPixels {
pub fn new(src: &[u8]) -> Self {
let mut array = [0; 32];
array[..src.len()].copy_from_slice(src);
Self(u8x32::from_array(array).cast())
}
pub fn write(&self, dst: &mut [u8]) {
let u8simd: u8x32 = self.0.cast();
dst.copy_from_slice(&u8simd.as_array()[..dst.len()]);
}
}
const fn gen_swizzle(byte: usize) -> [usize; 32] {
let mut result = [byte; 32];
let mut i = 0;
while i < 32 {
result[i] += i & !3;
i += 1;
}
result
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(usize)]
pub enum AlphaConfig {
FirstByte,
SecondByte,
ThirdByte,
FourthByte,
None,
}
#[inline(always)]
pub fn blend8(
src: EightPixels,
dst: &mut [u8],
alpha_config: AlphaConfig,
) {
let result = if alpha_config != AlphaConfig::None {
let dst_p = EightPixels::new(dst);
let src_a = match alpha_config {
AlphaConfig::FirstByte => simd_swizzle!(src.0, gen_swizzle(0)),
AlphaConfig::SecondByte => simd_swizzle!(src.0, gen_swizzle(1)),
AlphaConfig::ThirdByte => simd_swizzle!(src.0, gen_swizzle(2)),
AlphaConfig::FourthByte => simd_swizzle!(src.0, gen_swizzle(3)),
_ => unreachable!(),
};
let u8_max = u16x32::from_array([u8::MAX as _; 32]);
let dst_a = u8_max - src_a;
EightPixels(((src.0 * src_a) + (dst_p.0 * dst_a)) / u8_max)
} else {
src
};
result.write(dst);
}
pub struct SsaaCoords<const SSAA_SQ: usize> {
src_o: [usizex8; SSAA_SQ],
src_x: [usizex8; SSAA_SQ],
src_y: [usizex8; SSAA_SQ],
}
impl<const SSAA_SQ: usize> SsaaCoords<SSAA_SQ> {
pub fn new() -> Self {
const FULL_USIZE_MAX: usizex8 = usizex8::from_array([usize::MAX; 8]);
Self {
src_o: [FULL_USIZE_MAX; SSAA_SQ],
src_x: [FULL_USIZE_MAX; SSAA_SQ],
src_y: [FULL_USIZE_MAX; SSAA_SQ],
}
}
#[inline(always)]
pub fn set(&mut self, pixel: usize, sub_pixel: usize, x: usize, y: usize) {
assert!(pixel < 8);
self.src_o[sub_pixel][pixel] = pixel;
self.src_x[sub_pixel][pixel] = x;
self.src_y[sub_pixel][pixel] = y;
}
}
#[inline(always)]
pub fn ssaa8<P: PixelArray, const SSAA_SQ: usize>(
src_coords: SsaaCoords<SSAA_SQ>,
src: &P,
) -> EightPixels {
let src_w = usizex8::from_array([src.width(); 8]);
let src_h = usizex8::from_array([src.height(); 8]);
let src_l = usizex8::from_array([src.length(); 8]);
let mut ssaa_px = [0; 8];
let mut src_sum = [u16x4::from_array([0; 4]); 8];
for i in 0..SSAA_SQ {
let src_o = src_coords.src_o[i];
let src_x = src_coords.src_x[i];
let src_y = src_coords.src_y[i];
let src_i = src_y * src_w + src_x;
let usable_x = src_x.simd_lt(src_w);
let usable_y = src_y.simd_lt(src_h);
let usable_l = src_i.simd_lt(src_l);
let usable = (usable_x & usable_y & usable_l).to_array();
for j in 0..8 {
if usable[j] {
let rgba = src.get(src_i[j]).into();
src_sum[src_o[j]] += u8x4::from_array(rgba).cast();
ssaa_px[src_o[j]] += 1;
}
}
}
let mut result = u16x32::from_array([0; 32]);
for i in 0..8 {
let j = i * 4;
let result = &mut result.as_mut_array()[j..][..4];
let src_sum = src_sum[i].to_array();
let src = if true {
src_sum.map(|sum| sum / (SSAA_SQ as u16))
} else {
let num_px = match ssaa_px[i] {
0 => 1,
n => n,
};
src_sum.map(|sum| sum / num_px)
};
result.copy_from_slice(&src);
}
EightPixels(result)
}