use core::arch::wasm32::*;
use super::*;
#[inline(always)]
unsafe fn write_rgb_16(r: v128, g: v128, b: v128, ptr: *mut u8) {
unsafe {
let r0 = i8x16(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5);
let g0 = i8x16(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1);
let b0 = i8x16(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1);
let out0 = v128_or(
v128_or(u8x16_swizzle(r, r0), u8x16_swizzle(g, g0)),
u8x16_swizzle(b, b0),
);
let r1 = i8x16(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1);
let g1 = i8x16(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10);
let b1 = i8x16(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1);
let out1 = v128_or(
v128_or(u8x16_swizzle(r, r1), u8x16_swizzle(g, g1)),
u8x16_swizzle(b, b1),
);
let r2 = i8x16(
-1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
);
let g2 = i8x16(
-1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
);
let b2 = i8x16(
10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
);
let out2 = v128_or(
v128_or(u8x16_swizzle(r, r2), u8x16_swizzle(g, g2)),
u8x16_swizzle(b, b2),
);
v128_store(ptr.cast(), out0);
v128_store(ptr.add(16).cast(), out1);
v128_store(ptr.add(32).cast(), out2);
}
}
#[inline(always)]
unsafe fn write_rgba_16(r: v128, g: v128, b: v128, a: v128, ptr: *mut u8) {
unsafe {
let r0 = i8x16(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1);
let g0 = i8x16(-1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1);
let b0 = i8x16(-1, -1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1);
let a0 = i8x16(-1, -1, -1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3);
let out0 = v128_or(
v128_or(u8x16_swizzle(r, r0), u8x16_swizzle(g, g0)),
v128_or(u8x16_swizzle(b, b0), u8x16_swizzle(a, a0)),
);
let r1 = i8x16(4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1);
let g1 = i8x16(-1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1, -1);
let b1 = i8x16(-1, -1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1);
let a1 = i8x16(-1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7);
let out1 = v128_or(
v128_or(u8x16_swizzle(r, r1), u8x16_swizzle(g, g1)),
v128_or(u8x16_swizzle(b, b1), u8x16_swizzle(a, a1)),
);
let r2 = i8x16(8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1, -1, -1);
let g2 = i8x16(-1, 8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1, -1);
let b2 = i8x16(-1, -1, 8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1);
let a2 = i8x16(-1, -1, -1, 8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11);
let out2 = v128_or(
v128_or(u8x16_swizzle(r, r2), u8x16_swizzle(g, g2)),
v128_or(u8x16_swizzle(b, b2), u8x16_swizzle(a, a2)),
);
let r3 = i8x16(
12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15, -1, -1, -1,
);
let g3 = i8x16(
-1, 12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15, -1, -1,
);
let b3 = i8x16(
-1, -1, 12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15, -1,
);
let a3 = i8x16(
-1, -1, -1, 12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15,
);
let out3 = v128_or(
v128_or(u8x16_swizzle(r, r3), u8x16_swizzle(g, g3)),
v128_or(u8x16_swizzle(b, b3), u8x16_swizzle(a, a3)),
);
v128_store(ptr.cast(), out0);
v128_store(ptr.add(16).cast(), out1);
v128_store(ptr.add(32).cast(), out2);
v128_store(ptr.add(48).cast(), out3);
}
}
#[inline]
#[target_feature(enable = "simd128")]
pub(crate) unsafe fn gbr_to_rgb_row(
g: &[u8],
b: &[u8],
r: &[u8],
rgb_out: &mut [u8],
width: usize,
) {
debug_assert!(g.len() >= width, "g row too short");
debug_assert!(b.len() >= width, "b row too short");
debug_assert!(r.len() >= width, "r row too short");
debug_assert!(rgb_out.len() >= width * 3, "rgb_out row too short");
unsafe {
let mut x = 0usize;
while x + 16 <= width {
let g_v = v128_load(g.as_ptr().add(x).cast());
let b_v = v128_load(b.as_ptr().add(x).cast());
let r_v = v128_load(r.as_ptr().add(x).cast());
write_rgb_16(r_v, g_v, b_v, rgb_out.as_mut_ptr().add(x * 3));
x += 16;
}
if x < width {
scalar::gbr_to_rgb_row(
&g[x..width],
&b[x..width],
&r[x..width],
&mut rgb_out[x * 3..width * 3],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "simd128")]
pub(crate) unsafe fn gbra_to_rgba_row(
g: &[u8],
b: &[u8],
r: &[u8],
a: &[u8],
rgba_out: &mut [u8],
width: usize,
) {
debug_assert!(g.len() >= width, "g row too short");
debug_assert!(b.len() >= width, "b row too short");
debug_assert!(r.len() >= width, "r row too short");
debug_assert!(a.len() >= width, "a row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let mut x = 0usize;
while x + 16 <= width {
let g_v = v128_load(g.as_ptr().add(x).cast());
let b_v = v128_load(b.as_ptr().add(x).cast());
let r_v = v128_load(r.as_ptr().add(x).cast());
let a_v = v128_load(a.as_ptr().add(x).cast());
write_rgba_16(r_v, g_v, b_v, a_v, rgba_out.as_mut_ptr().add(x * 4));
x += 16;
}
if x < width {
scalar::gbra_to_rgba_row(
&g[x..width],
&b[x..width],
&r[x..width],
&a[x..width],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}
#[inline]
#[target_feature(enable = "simd128")]
pub(crate) unsafe fn gbr_to_rgba_opaque_row(
g: &[u8],
b: &[u8],
r: &[u8],
rgba_out: &mut [u8],
width: usize,
) {
debug_assert!(g.len() >= width, "g row too short");
debug_assert!(b.len() >= width, "b row too short");
debug_assert!(r.len() >= width, "r row too short");
debug_assert!(rgba_out.len() >= width * 4, "rgba_out row too short");
unsafe {
let opaque = u8x16_splat(0xFF);
let mut x = 0usize;
while x + 16 <= width {
let g_v = v128_load(g.as_ptr().add(x).cast());
let b_v = v128_load(b.as_ptr().add(x).cast());
let r_v = v128_load(r.as_ptr().add(x).cast());
write_rgba_16(r_v, g_v, b_v, opaque, rgba_out.as_mut_ptr().add(x * 4));
x += 16;
}
if x < width {
scalar::gbr_to_rgba_opaque_row(
&g[x..width],
&b[x..width],
&r[x..width],
&mut rgba_out[x * 4..width * 4],
width - x,
);
}
}
}