use std::arch::wasm32::*;
#[inline]
#[target_feature(enable = "simd128")]
pub unsafe fn w_store_u8x8_m4<const CN: usize>(dst_ptr: *mut u8, in_regi: v128) {
unsafe {
let pixel = i32x4_extract_lane::<0>(in_regi) as u32;
if CN == 4 {
(dst_ptr as *mut u32).write_unaligned(pixel);
} else if CN == 3 {
let bits = pixel.to_le_bytes();
let first_byte = u16::from_le_bytes([bits[0], bits[1]]);
(dst_ptr as *mut u16).write_unaligned(first_byte);
dst_ptr.add(2).write_unaligned(bits[2]);
} else if CN == 2 {
let bits = pixel.to_le_bytes();
let first_byte = u16::from_le_bytes([bits[0], bits[1]]);
(dst_ptr as *mut u16).write_unaligned(first_byte);
} else {
let bits = pixel.to_le_bytes();
dst_ptr.write_unaligned(bits[0]);
}
}
}
#[inline]
#[target_feature(enable = "simd128")]
pub fn u32x4_pack_trunc_u16x8(a: v128, b: v128) -> v128 {
u8x16_shuffle::<0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29>(a, b)
}
#[inline]
#[target_feature(enable = "simd128")]
#[allow(dead_code)]
pub fn u16x8_pack_trunc_u8x16(a: v128, b: v128) -> v128 {
u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b)
}
#[inline]
#[target_feature(enable = "simd128")]
#[allow(dead_code)]
pub fn i32x4_pack_trunc_i64x2(a: v128, b: v128) -> v128 {
u8x16_shuffle::<0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27>(a, b)
}
#[inline]
#[target_feature(enable = "simd128")]
pub unsafe fn load_u8_s32_fast<const CN: usize>(ptr: *const u8) -> v128 {
unsafe {
if CN == 4 {
let mut undef = i32x4_splat(0);
undef = i32x4_replace_lane::<0>(undef, (ptr as *const i32).read_unaligned());
let k1 = u16x8_extend_low_u8x16(undef);
u32x4_extend_low_u16x8(k1)
} else if CN == 3 {
let mut undef = i32x4_splat(0);
undef = i16x8_replace_lane::<0>(undef, (ptr as *const i16).read_unaligned());
undef = u8x16_replace_lane::<2>(undef, ptr.add(2).read_unaligned());
let k1 = u16x8_extend_low_u8x16(undef);
u32x4_extend_low_u16x8(k1)
} else if CN == 2 {
let mut undef = i32x4_splat(0);
undef = i16x8_replace_lane::<0>(undef, (ptr as *const i16).read_unaligned());
let k1 = u16x8_extend_low_u8x16(undef);
u32x4_extend_low_u16x8(k1)
} else {
let mut undef = i32x4_splat(0);
undef = u8x16_replace_lane::<0>(undef, ptr.read_unaligned());
let k1 = u16x8_extend_low_u8x16(undef);
u32x4_extend_low_u16x8(k1)
}
}
}
#[inline]
#[target_feature(enable = "simd128")]
pub fn i32x4_mul_by_3(v: v128) -> v128 {
i32x4_add(i32x4_shl(v, 1), v)
}