use crate::wasm32::utils::{wasm_unpackhi_i8x16, wasm_unpacklo_i8x16};
use std::arch::wasm32::*;
#[inline]
pub(crate) unsafe fn v128_deinterleave_u8_x2(a: v128, b: v128) -> (v128, v128) {
let x0 = u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b);
let x1 = u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(a, b);
(x0, x1)
}
#[inline]
pub(crate) unsafe fn v128_load_deinterleave_u8_x2(ptr: *const u8) -> (v128, v128) {
let a = v128_load(ptr as *const v128);
let b = v128_load(ptr.add(16) as *const v128);
v128_deinterleave_u8_x2(a, b)
}
#[inline]
pub(crate) unsafe fn v128_load_deinterleave_half_u8_x2(ptr: *const u8) -> (v128, v128) {
let a = v128_load(ptr as *const v128);
v128_deinterleave_u8_x2(a, u8x16_splat(0))
}
#[inline]
pub(crate) unsafe fn wasm_store_interleave_u8x4(ptr: *mut u8, packed: (v128, v128, v128, v128)) {
let a = packed.0;
let b = packed.1;
let c = packed.2;
let d = packed.3;
let u0 = wasm_unpacklo_i8x16(a, c); let u1 = wasm_unpackhi_i8x16(a, c); let u2 = wasm_unpacklo_i8x16(b, d); let u3 = wasm_unpackhi_i8x16(b, d);
let v0 = wasm_unpacklo_i8x16(u0, u2); let v1 = wasm_unpackhi_i8x16(u0, u2); let v2 = wasm_unpacklo_i8x16(u1, u3); let v3 = wasm_unpackhi_i8x16(u1, u3);
v128_store(ptr as *mut v128, v0);
v128_store(ptr.add(16) as *mut v128, v1);
v128_store(ptr.add(32) as *mut v128, v2);
v128_store(ptr.add(48) as *mut v128, v3);
}
#[inline]
pub(crate) unsafe fn wasm_store_interleave_u8x3(ptr: *mut u8, packed: (v128, v128, v128)) {
let a = packed.0;
let b = packed.1;
let c = packed.2;
let t00 = u8x16_shuffle::<0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5>(a, b);
let t01 = u8x16_shuffle::<21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26>(a, b);
let t02 = u8x16_shuffle::<0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0>(a, b);
let t10 = u8x16_shuffle::<0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15>(t00, c);
let t11 = u8x16_shuffle::<0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15>(t01, c);
let t12 = u8x16_shuffle::<26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31>(t02, c);
v128_store(ptr as *mut v128, t10);
v128_store(ptr.add(16) as *mut v128, t11);
v128_store(ptr.add(32) as *mut v128, t12);
}