use std::arch::wasm32::{
f32x4_abs, f32x4_add, f32x4_convert_i32x4, f32x4_div, f32x4_eq, f32x4_extract_lane, f32x4_ge,
f32x4_gt, f32x4_le, f32x4_lt, f32x4_max, f32x4_min, f32x4_mul, f32x4_nearest, f32x4_neg,
f32x4_splat, f32x4_sub, i8x16_add, i8x16_all_true, i8x16_eq, i8x16_ge, i8x16_gt, i8x16_neg,
i8x16_shl, i8x16_shr, i8x16_shuffle, i8x16_splat, i8x16_sub, i16x8_add, i16x8_all_true,
i16x8_eq, i16x8_extend_high_i8x16, i16x8_extend_low_i8x16, i16x8_extmul_high_i8x16,
i16x8_extmul_low_i8x16, i16x8_ge, i16x8_gt, i16x8_mul, i16x8_narrow_i32x4, i16x8_neg,
i16x8_shl, i16x8_shr, i16x8_shuffle, i16x8_splat, i16x8_sub, i32x4_add, i32x4_all_true,
i32x4_eq, i32x4_extend_high_i16x8, i32x4_extend_low_i16x8, i32x4_ge, i32x4_gt, i32x4_mul,
i32x4_neg, i32x4_shl, i32x4_shr, i32x4_shuffle, i32x4_splat, i32x4_sub, i32x4_trunc_sat_f32x4,
u8x16_add, u8x16_eq, u8x16_ge, u8x16_gt, u8x16_narrow_i16x8, u8x16_shl, u8x16_shr,
u8x16_shuffle, u8x16_splat, u8x16_sub, u16x8_add, u16x8_eq, u16x8_extend_high_u8x16,
u16x8_extend_low_u8x16, u16x8_extmul_high_u8x16, u16x8_extmul_low_u8x16, u16x8_ge, u16x8_gt,
u16x8_mul, u16x8_shl, u16x8_shr, u16x8_splat, u16x8_sub, v128, v128_and, v128_any_true,
v128_bitselect, v128_load, v128_not, v128_or, v128_store, v128_xor,
};
use std::mem::transmute;
#[cfg(target_feature = "relaxed-simd")]
use std::arch::wasm32::f32x4_relaxed_madd;
use super::{lanes, simd_type};
use crate::ops::{
Concat, Extend, FloatOps, IntOps, Interleave, MaskOps, NarrowSaturate, NumOps, SignedIntOps,
ToFloat,
};
use crate::{Isa, Mask, Simd};
simd_type!(F32x4, v128, f32, M32, Wasm32Isa);
simd_type!(I32x4, v128, i32, M32, Wasm32Isa);
simd_type!(I16x8, v128, i16, M16, Wasm32Isa);
simd_type!(I8x16, v128, i8, M8, Wasm32Isa);
simd_type!(U8x16, v128, u8, M8, Wasm32Isa);
simd_type!(U16x8, v128, u16, M16, Wasm32Isa);
simd_type!(U32x4, v128, u32, M32, Wasm32Isa);
#[derive(Copy, Clone)]
pub struct Wasm32Isa {
_private: (),
}
impl Wasm32Isa {
pub fn new() -> Option<Self> {
Some(Wasm32Isa { _private: () })
}
}
unsafe impl Isa for Wasm32Isa {
type M32 = M32;
type M16 = M16;
type M8 = M8;
type F32 = F32x4;
type I32 = I32x4;
type I16 = I16x8;
type I8 = I8x16;
type U8 = U8x16;
type U16 = U16x8;
type U32 = U32x4;
type Bits = I32x4;
fn f32(self) -> impl FloatOps<f32, Simd = Self::F32, Int = Self::I32> {
self
}
fn i32(
self,
) -> impl SignedIntOps<i32, Simd = Self::I32>
+ NarrowSaturate<i32, i16, Output = Self::I16>
+ Concat<i32>
+ ToFloat<i32, Output = Self::F32> {
self
}
fn i16(
self,
) -> impl SignedIntOps<i16, Simd = Self::I16>
+ NarrowSaturate<i16, u8, Output = Self::U8>
+ Extend<i16, Output = Self::I32>
+ Interleave<i16> {
self
}
fn i8(
self,
) -> impl SignedIntOps<i8, Simd = Self::I8> + Extend<i8, Output = Self::I16> + Interleave<i8>
{
self
}
fn u8(
self,
) -> impl IntOps<u8, Simd = Self::U8> + Extend<u8, Output = Self::U16> + Interleave<u8> {
self
}
fn u16(self) -> impl IntOps<u16, Simd = Self::U16> {
self
}
fn m32(self) -> impl MaskOps<Self::M32> {
self
}
fn m16(self) -> impl MaskOps<Self::M16> {
self
}
fn m8(self) -> impl MaskOps<Self::M8> {
self
}
}
macro_rules! simd_ops_common {
($simd:ident, $mask:ident, $mask_elem:ty) => {
type Simd = $simd;
#[inline]
fn len(self) -> usize {
lanes::<$simd>()
}
#[inline]
fn first_n_mask(self, n: usize) -> $mask {
let mask: [$mask_elem; lanes::<$simd>()] =
std::array::from_fn(|i| if i < n { !0 } else { 0 });
$mask(unsafe { v128_load(mask.as_ptr() as *const v128) })
}
#[inline]
unsafe fn load_ptr(self, ptr: *const <$simd as Simd>::Elem) -> $simd {
$simd(unsafe { v128_load(ptr as *const v128) })
}
#[inline]
unsafe fn store_ptr(self, x: $simd, ptr: *mut <$simd as Simd>::Elem) {
unsafe { v128_store(ptr as *mut v128, x.0) }
}
#[inline]
unsafe fn load_ptr_mask(self, ptr: *const <$simd as Simd>::Elem, mask: $mask) -> $simd {
type Elem = <$simd as Simd>::Elem;
let mask_array = Mask::to_array(mask);
let mut vec = Simd::to_array(NumOps::<Elem>::zero(self));
for i in 0..mask_array.len() {
if mask_array[i] {
vec[i] = *ptr.add(i);
}
}
self.load_ptr(vec.as_ref().as_ptr())
}
#[inline]
unsafe fn store_ptr_mask(
self,
x: $simd,
ptr: *mut <$simd as Simd>::Elem,
mask: <$simd as Simd>::Mask,
) {
type Elem = <$simd as Simd>::Elem;
let mask_array = Mask::to_array(mask);
let x_array = Simd::to_array(x);
for i in 0..NumOps::<Elem>::len(self) {
if mask_array[i] {
*ptr.add(i) = x_array[i];
}
}
}
#[inline]
fn select(self, x: $simd, y: $simd, mask: <$simd as Simd>::Mask) -> $simd {
$simd(v128_bitselect(x.0, y.0, mask.0))
}
#[inline]
fn and(self, x: $simd, y: $simd) -> $simd {
v128_and(x.0, y.0).into()
}
#[inline]
fn not(self, x: $simd) -> $simd {
v128_not(x.0).into()
}
#[inline]
fn or(self, x: $simd, y: $simd) -> $simd {
v128_or(x.0, y.0).into()
}
#[inline]
fn xor(self, x: $simd, y: $simd) -> $simd {
v128_xor(x.0, y.0).into()
}
};
}
unsafe impl NumOps<f32> for Wasm32Isa {
simd_ops_common!(F32x4, M32, i32);
#[inline]
fn add(self, x: F32x4, y: F32x4) -> F32x4 {
F32x4(f32x4_add(x.0, y.0))
}
#[inline]
fn sub(self, x: F32x4, y: F32x4) -> F32x4 {
F32x4(f32x4_sub(x.0, y.0))
}
#[inline]
fn mul(self, x: F32x4, y: F32x4) -> F32x4 {
F32x4(f32x4_mul(x.0, y.0))
}
#[inline]
fn mul_add(self, a: F32x4, b: F32x4, c: F32x4) -> F32x4 {
#[cfg(target_feature = "relaxed-simd")]
{
F32x4(f32x4_relaxed_madd(a.0, b.0, c.0))
}
#[cfg(not(target_feature = "relaxed-simd"))]
{
F32x4(f32x4_add(f32x4_mul(a.0, b.0), c.0))
}
}
#[inline]
fn lt(self, x: F32x4, y: F32x4) -> M32 {
M32(f32x4_lt(x.0, y.0))
}
#[inline]
fn le(self, x: F32x4, y: F32x4) -> M32 {
M32(f32x4_le(x.0, y.0))
}
#[inline]
fn eq(self, x: F32x4, y: F32x4) -> M32 {
M32(f32x4_eq(x.0, y.0))
}
#[inline]
fn ge(self, x: F32x4, y: F32x4) -> M32 {
M32(f32x4_ge(x.0, y.0))
}
#[inline]
fn gt(self, x: F32x4, y: F32x4) -> M32 {
M32(f32x4_gt(x.0, y.0))
}
#[inline]
fn min(self, x: F32x4, y: F32x4) -> F32x4 {
F32x4(f32x4_min(x.0, y.0))
}
#[inline]
fn max(self, x: F32x4, y: F32x4) -> F32x4 {
F32x4(f32x4_max(x.0, y.0))
}
#[inline]
fn splat(self, x: f32) -> F32x4 {
F32x4(f32x4_splat(x))
}
#[inline]
fn sum(self, x: F32x4) -> f32 {
let lo_2 = x.0;
let hi_2 = i32x4_shuffle::<2, 3, 0, 0>(x.0, x.0);
let sum_2 = f32x4_add(lo_2, hi_2);
let lo = sum_2;
let hi = i32x4_shuffle::<1, 0, 0, 0>(sum_2, sum_2);
let sum = f32x4_add(lo, hi);
f32x4_extract_lane::<0>(sum)
}
}
impl FloatOps<f32> for Wasm32Isa {
type Int = <Self as Isa>::I32;
#[inline]
fn div(self, x: F32x4, y: F32x4) -> F32x4 {
F32x4(f32x4_div(x.0, y.0))
}
#[inline]
fn neg(self, x: F32x4) -> F32x4 {
F32x4(f32x4_neg(x.0))
}
#[inline]
fn abs(self, x: F32x4) -> F32x4 {
F32x4(f32x4_abs(x.0))
}
#[inline]
fn mul_sub_from(self, a: F32x4, b: F32x4, c: F32x4) -> F32x4 {
#[cfg(target_feature = "relaxed-simd")]
{
F32x4(f32x4_relaxed_nmadd(a.0, b.0, c.0))
}
#[cfg(not(target_feature = "relaxed-simd"))]
{
F32x4(f32x4_sub(c.0, f32x4_mul(a.0, b.0)))
}
}
#[inline]
fn round_ties_even(self, x: F32x4) -> F32x4 {
F32x4(f32x4_nearest(x.0))
}
#[inline]
fn to_int_trunc(self, x: F32x4) -> Self::Int {
I32x4(i32x4_trunc_sat_f32x4(x.0))
}
#[inline]
fn to_int_round(self, x: F32x4) -> Self::Int {
I32x4(i32x4_trunc_sat_f32x4(f32x4_nearest(x.0)))
}
}
unsafe impl NumOps<i32> for Wasm32Isa {
simd_ops_common!(I32x4, M32, i32);
#[inline]
fn add(self, x: I32x4, y: I32x4) -> I32x4 {
I32x4(i32x4_add(x.0, y.0))
}
#[inline]
fn sub(self, x: I32x4, y: I32x4) -> I32x4 {
I32x4(i32x4_sub(x.0, y.0))
}
#[inline]
fn mul(self, x: I32x4, y: I32x4) -> I32x4 {
I32x4(i32x4_mul(x.0, y.0))
}
#[inline]
fn splat(self, x: i32) -> I32x4 {
I32x4(i32x4_splat(x))
}
#[inline]
fn eq(self, x: I32x4, y: I32x4) -> M32 {
M32(i32x4_eq(x.0, y.0))
}
#[inline]
fn ge(self, x: I32x4, y: I32x4) -> M32 {
M32(i32x4_ge(x.0, y.0))
}
#[inline]
fn gt(self, x: I32x4, y: I32x4) -> M32 {
M32(i32x4_gt(x.0, y.0))
}
}
impl IntOps<i32> for Wasm32Isa {
#[inline]
fn shift_left<const SHIFT: i32>(self, x: I32x4) -> I32x4 {
I32x4(i32x4_shl(x.0, SHIFT as u32))
}
#[inline]
fn shift_right<const SHIFT: i32>(self, x: I32x4) -> I32x4 {
I32x4(i32x4_shr(x.0, SHIFT as u32))
}
}
impl SignedIntOps<i32> for Wasm32Isa {
#[inline]
fn neg(self, x: I32x4) -> I32x4 {
I32x4(i32x4_neg(x.0))
}
}
impl NarrowSaturate<i32, i16> for Wasm32Isa {
type Output = I16x8;
#[inline]
fn narrow_saturate(self, low: I32x4, high: I32x4) -> I16x8 {
I16x8(i16x8_narrow_i32x4(low.0, high.0))
}
}
impl Concat<i32> for Wasm32Isa {
#[inline]
fn concat_low(self, a: I32x4, b: I32x4) -> I32x4 {
I32x4(i32x4_shuffle::<0, 1, 4, 5>(a.0, b.0))
}
#[inline]
fn concat_high(self, a: I32x4, b: I32x4) -> I32x4 {
I32x4(i32x4_shuffle::<2, 3, 6, 7>(a.0, b.0))
}
}
impl ToFloat<i32> for Wasm32Isa {
type Output = F32x4;
#[inline]
fn to_float(self, x: I32x4) -> F32x4 {
F32x4(f32x4_convert_i32x4(x.0))
}
}
unsafe impl NumOps<i16> for Wasm32Isa {
simd_ops_common!(I16x8, M16, i16);
#[inline]
fn add(self, x: I16x8, y: I16x8) -> I16x8 {
I16x8(i16x8_add(x.0, y.0))
}
#[inline]
fn sub(self, x: I16x8, y: I16x8) -> I16x8 {
I16x8(i16x8_sub(x.0, y.0))
}
#[inline]
fn mul(self, x: I16x8, y: I16x8) -> I16x8 {
I16x8(i16x8_mul(x.0, y.0))
}
#[inline]
fn splat(self, x: i16) -> I16x8 {
I16x8(i16x8_splat(x))
}
#[inline]
fn eq(self, x: I16x8, y: I16x8) -> M16 {
M16(i16x8_eq(x.0, y.0))
}
#[inline]
fn ge(self, x: I16x8, y: I16x8) -> M16 {
M16(i16x8_ge(x.0, y.0))
}
#[inline]
fn gt(self, x: I16x8, y: I16x8) -> M16 {
M16(i16x8_gt(x.0, y.0))
}
}
impl IntOps<i16> for Wasm32Isa {
#[inline]
fn shift_left<const SHIFT: i32>(self, x: I16x8) -> I16x8 {
I16x8(i16x8_shl(x.0, SHIFT as u32))
}
#[inline]
fn shift_right<const SHIFT: i32>(self, x: I16x8) -> I16x8 {
I16x8(i16x8_shr(x.0, SHIFT as u32))
}
}
impl SignedIntOps<i16> for Wasm32Isa {
#[inline]
fn neg(self, x: I16x8) -> I16x8 {
I16x8(i16x8_neg(x.0))
}
}
impl Extend<i16> for Wasm32Isa {
type Output = I32x4;
#[inline]
fn extend(self, x: I16x8) -> (I32x4, I32x4) {
let low = i32x4_extend_low_i16x8(x.0);
let high = i32x4_extend_high_i16x8(x.0);
(low.into(), high.into())
}
}
impl Interleave<i16> for Wasm32Isa {
#[inline]
fn interleave_low(self, a: I16x8, b: I16x8) -> I16x8 {
i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a.0, b.0).into()
}
#[inline]
fn interleave_high(self, a: I16x8, b: I16x8) -> I16x8 {
i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a.0, b.0).into()
}
}
impl NarrowSaturate<i16, u8> for Wasm32Isa {
type Output = U8x16;
#[inline]
fn narrow_saturate(self, low: I16x8, high: I16x8) -> U8x16 {
U8x16(u8x16_narrow_i16x8(low.0, high.0))
}
}
unsafe impl NumOps<i8> for Wasm32Isa {
simd_ops_common!(I8x16, M8, i8);
#[inline]
fn add(self, x: I8x16, y: I8x16) -> I8x16 {
I8x16(i8x16_add(x.0, y.0))
}
#[inline]
fn sub(self, x: I8x16, y: I8x16) -> I8x16 {
I8x16(i8x16_sub(x.0, y.0))
}
#[inline]
fn mul(self, x: I8x16, y: I8x16) -> I8x16 {
let prod_low = i16x8_extmul_low_i8x16(x.0, y.0);
let prod_high = i16x8_extmul_high_i8x16(x.0, y.0);
let prod_i8 = i8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(
prod_low, prod_high,
);
I8x16(prod_i8)
}
#[inline]
fn splat(self, x: i8) -> I8x16 {
I8x16(i8x16_splat(x))
}
#[inline]
fn eq(self, x: I8x16, y: I8x16) -> M8 {
M8(i8x16_eq(x.0, y.0))
}
#[inline]
fn ge(self, x: I8x16, y: I8x16) -> M8 {
M8(i8x16_ge(x.0, y.0))
}
#[inline]
fn gt(self, x: I8x16, y: I8x16) -> M8 {
M8(i8x16_gt(x.0, y.0))
}
}
impl IntOps<i8> for Wasm32Isa {
#[inline]
fn shift_left<const SHIFT: i32>(self, x: I8x16) -> I8x16 {
I8x16(i8x16_shl(x.0, SHIFT as u32))
}
#[inline]
fn shift_right<const SHIFT: i32>(self, x: I8x16) -> I8x16 {
I8x16(i8x16_shr(x.0, SHIFT as u32))
}
}
impl SignedIntOps<i8> for Wasm32Isa {
#[inline]
fn neg(self, x: I8x16) -> I8x16 {
I8x16(i8x16_neg(x.0))
}
}
impl Extend<i8> for Wasm32Isa {
type Output = I16x8;
#[inline]
fn extend(self, x: I8x16) -> (I16x8, I16x8) {
let low = i16x8_extend_low_i8x16(x.0);
let high = i16x8_extend_high_i8x16(x.0);
(low.into(), high.into())
}
}
impl Interleave<i8> for Wasm32Isa {
#[inline]
fn interleave_low(self, a: I8x16, b: I8x16) -> I8x16 {
i8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(a.0, b.0).into()
}
#[inline]
fn interleave_high(self, a: I8x16, b: I8x16) -> I8x16 {
i8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(a.0, b.0)
.into()
}
}
unsafe impl NumOps<u8> for Wasm32Isa {
simd_ops_common!(U8x16, M8, i8);
#[inline]
fn add(self, x: U8x16, y: U8x16) -> U8x16 {
U8x16(u8x16_add(x.0, y.0))
}
#[inline]
fn sub(self, x: U8x16, y: U8x16) -> U8x16 {
U8x16(u8x16_sub(x.0, y.0))
}
#[inline]
fn mul(self, x: U8x16, y: U8x16) -> U8x16 {
let prod_low = u16x8_extmul_low_u8x16(x.0, y.0);
let prod_high = u16x8_extmul_high_u8x16(x.0, y.0);
let prod_u8 = u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(
prod_low, prod_high,
);
U8x16(prod_u8)
}
#[inline]
fn splat(self, x: u8) -> U8x16 {
U8x16(u8x16_splat(x))
}
#[inline]
fn eq(self, x: U8x16, y: U8x16) -> M8 {
M8(u8x16_eq(x.0, y.0))
}
#[inline]
fn ge(self, x: U8x16, y: U8x16) -> M8 {
M8(u8x16_ge(x.0, y.0))
}
#[inline]
fn gt(self, x: U8x16, y: U8x16) -> M8 {
M8(u8x16_gt(x.0, y.0))
}
}
unsafe impl NumOps<u16> for Wasm32Isa {
simd_ops_common!(U16x8, M16, u16);
#[inline]
fn add(self, x: U16x8, y: U16x8) -> U16x8 {
U16x8(u16x8_add(x.0, y.0))
}
#[inline]
fn sub(self, x: U16x8, y: U16x8) -> U16x8 {
U16x8(u16x8_sub(x.0, y.0))
}
#[inline]
fn mul(self, x: U16x8, y: U16x8) -> U16x8 {
U16x8(u16x8_mul(x.0, y.0))
}
#[inline]
fn splat(self, x: u16) -> U16x8 {
U16x8(u16x8_splat(x))
}
#[inline]
fn eq(self, x: U16x8, y: U16x8) -> M16 {
M16(u16x8_eq(x.0, y.0))
}
#[inline]
fn ge(self, x: U16x8, y: U16x8) -> M16 {
M16(u16x8_ge(x.0, y.0))
}
#[inline]
fn gt(self, x: U16x8, y: U16x8) -> M16 {
M16(u16x8_gt(x.0, y.0))
}
}
impl IntOps<u16> for Wasm32Isa {
#[inline]
fn shift_left<const SHIFT: i32>(self, x: U16x8) -> U16x8 {
U16x8(u16x8_shl(x.0, SHIFT as u32))
}
#[inline]
fn shift_right<const SHIFT: i32>(self, x: U16x8) -> U16x8 {
U16x8(u16x8_shr(x.0, SHIFT as u32))
}
}
impl Extend<u8> for Wasm32Isa {
type Output = U16x8;
#[inline]
fn extend(self, x: U8x16) -> (U16x8, U16x8) {
let low = u16x8_extend_low_u8x16(x.0);
let high = u16x8_extend_high_u8x16(x.0);
(low.into(), high.into())
}
}
impl IntOps<u8> for Wasm32Isa {
#[inline]
fn shift_left<const SHIFT: i32>(self, x: U8x16) -> U8x16 {
U8x16(u8x16_shl(x.0, SHIFT as u32))
}
#[inline]
fn shift_right<const SHIFT: i32>(self, x: U8x16) -> U8x16 {
U8x16(u8x16_shr(x.0, SHIFT as u32))
}
}
impl Interleave<u8> for Wasm32Isa {
#[inline]
fn interleave_low(self, a: U8x16, b: U8x16) -> U8x16 {
u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(a.0, b.0).into()
}
#[inline]
fn interleave_high(self, a: U8x16, b: U8x16) -> U8x16 {
u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(a.0, b.0)
.into()
}
}
macro_rules! mask_type {
($mask:ident, $elem:ty, $len: expr, $all_true_fn:ident) => {
#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct $mask(v128);
impl Mask for $mask {
type Array = [bool; $len];
#[inline]
fn to_array(self) -> Self::Array {
let array = unsafe { transmute::<Self, [$elem; $len]>(self) };
std::array::from_fn(|i| array[i] != 0)
}
}
unsafe impl MaskOps<$mask> for Wasm32Isa {
#[inline]
fn and(self, x: $mask, y: $mask) -> $mask {
$mask(v128_and(x.0, y.0))
}
#[inline]
fn any(self, x: $mask) -> bool {
v128_any_true(x.0)
}
#[inline]
fn all(self, x: $mask) -> bool {
$all_true_fn(x.0)
}
}
};
}
mask_type!(M32, i32, 4, i32x4_all_true);
mask_type!(M16, i16, 8, i16x8_all_true);
mask_type!(M8, i8, 16, i8x16_all_true);