Skip to main content

wasmi_core/
simd.rs

1//! Defines the entire Wasm `simd` proposal API.
2
3use crate::{
4    memory::{self, ExtendInto},
5    simd,
6    value::Float,
7    wasm,
8    TrapCode,
9    V128,
10};
11use core::{
12    array,
13    ops::{BitAnd, BitOr, BitXor, Neg, Not},
14};
15
16macro_rules! op {
17    ($ty:ty, $op:tt) => {{
18        |lhs: $ty, rhs: $ty| lhs $op rhs
19    }};
20}
21
22/// An error that may occur when constructing an out of bounds lane index.
23pub struct OutOfBoundsLaneIdx;
24
25/// Helper trait used to infer the [`ImmLaneIdx`] from a given primitive.
26pub trait IntoLaneIdx {
27    /// The associated lane index type.
28    type LaneIdx: Sized + Copy + TryFrom<u8, Error = OutOfBoundsLaneIdx> + Into<u8>;
29}
30
31macro_rules! impl_into_lane_idx {
32    (
33        $( impl IntoLaneIdx for $ty:ty = $lane_idx:ty; )*
34    ) => {
35        $(
36            impl IntoLaneIdx for $ty {
37                type LaneIdx = $lane_idx;
38            }
39        )*
40    };
41}
42impl_into_lane_idx! {
43    impl IntoLaneIdx for i8 = ImmLaneIdx<16>;
44    impl IntoLaneIdx for u8 = ImmLaneIdx<16>;
45    impl IntoLaneIdx for i16 = ImmLaneIdx<8>;
46    impl IntoLaneIdx for u16 = ImmLaneIdx<8>;
47    impl IntoLaneIdx for i32 = ImmLaneIdx<4>;
48    impl IntoLaneIdx for u32 = ImmLaneIdx<4>;
49    impl IntoLaneIdx for f32 = ImmLaneIdx<4>;
50    impl IntoLaneIdx for i64 = ImmLaneIdx<2>;
51    impl IntoLaneIdx for u64 = ImmLaneIdx<2>;
52    impl IntoLaneIdx for f64 = ImmLaneIdx<2>;
53}
54
55/// A byte with values in the range 0–N identifying a lane.
56#[derive(Debug, Copy, Clone, PartialEq, Eq)]
57pub struct ImmLaneIdx<const N: u8>(u8);
58
59impl<const N: u8> ImmLaneIdx<N> {
60    /// Helper bit mask for construction and getter.
61    const MASK: u8 = (1_u8 << u8::ilog2(N)) - 1;
62
63    fn zero() -> Self {
64        Self(0)
65    }
66}
67
68impl<const N: u8> From<ImmLaneIdx<N>> for u8 {
69    fn from(lane: ImmLaneIdx<N>) -> u8 {
70        lane.0 & <ImmLaneIdx<N>>::MASK
71    }
72}
73
74impl<const N: u8> TryFrom<u8> for ImmLaneIdx<N> {
75    type Error = OutOfBoundsLaneIdx;
76
77    fn try_from(lane: u8) -> Result<Self, Self::Error> {
78        if lane > Self::MASK {
79            return Err(OutOfBoundsLaneIdx);
80        }
81        Ok(Self(lane))
82    }
83}
84
85/// A byte with values in the range 0–1 identifying a lane.
86pub type ImmLaneIdx2 = ImmLaneIdx<2>;
87/// A byte with values in the range 0–3 identifying a lane.
88pub type ImmLaneIdx4 = ImmLaneIdx<4>;
89/// A byte with values in the range 0–7 identifying a lane.
90pub type ImmLaneIdx8 = ImmLaneIdx<8>;
91/// A byte with values in the range 0–15 identifying a lane.
92pub type ImmLaneIdx16 = ImmLaneIdx<16>;
93/// A byte with values in the range 0–31 identifying a lane.
94pub type ImmLaneIdx32 = ImmLaneIdx<32>;
95
96/// Internal helper trait to help the type inference to do its jobs with fewer type annotations.
97///
98/// # Note
99///
100/// - This trait and its applications are hidden from outside this module.
101/// - For example `i32` is associated to the `i32x4` lane type.
102trait IntoLanes {
103    /// The `Lanes` type associated to the implementing type.
104    type Lanes: Lanes<Item = Self, LaneIdx = Self::LaneIdx>;
105    /// The `LaneIdx` type associated to the implementing type.
106    type LaneIdx;
107}
108
109/// Internal helper trait implemented by `Lanes` types.
110///
111/// Possible `Lanes` types include:
112///
113/// - `I64x2`
114/// - `I32x4`
115/// - `I16x8`
116/// - `I8x16`
117/// - `F64x2`
118/// - `F32x4`
119trait Lanes {
120    /// The type used in the lanes. E.g. `i32` for `i32x4`.
121    type Item;
122    /// The associated lane index type. E.g. `ImmLaneIdx4` for `i32x4`.
123    type LaneIdx;
124
125    /// The number of lanes for `Self`.
126    const LANES: usize;
127
128    /// A lane item where all bits are `1`.
129    const ALL_ONES: Self::Item;
130
131    /// A lane item where all bits are `0`.
132    const ALL_ZEROS: Self::Item;
133
134    /// Converts the [`V128`] to `Self`.
135    fn from_v128(value: V128) -> Self;
136
137    /// Converts `self` to a [`V128`] value.
138    fn into_v128(self) -> V128;
139
140    /// Creates `Self` by splatting `value`.
141    fn splat(value: Self::Item) -> Self;
142
143    /// Extract the item at `lane` from `self`.
144    fn extract_lane(self, lane: Self::LaneIdx) -> Self::Item;
145
146    /// Replace the item at `lane` with `item` and return `self` afterwards.
147    fn replace_lane(self, lane: Self::LaneIdx, item: Self::Item) -> Self;
148
149    /// Apply `f` for all lane items in `self`.
150    fn lanewise_unary(self, f: impl Fn(Self::Item) -> Self::Item) -> Self;
151
152    /// Apply `f` for all pairs of lane items in `self` and `other`.
153    fn lanewise_binary(self, other: Self, f: impl Fn(Self::Item, Self::Item) -> Self::Item)
154        -> Self;
155
156    /// Apply `f` for all triplets of lane items in `self` and `other`.
157    fn lanewise_ternary(
158        self,
159        b: Self,
160        c: Self,
161        f: impl Fn(Self::Item, Self::Item, Self::Item) -> Self::Item,
162    ) -> Self;
163
164    /// Apply `f` comparison for all pairs of lane items in `self` and `other`.
165    ///
166    /// Storing [`Self::ALL_ONES`] if `f` evaluates to `true` or [`Self::ALL_ZEROS`] otherwise per item.
167    fn lanewise_comparison(self, other: Self, f: impl Fn(Self::Item, Self::Item) -> bool) -> Self;
168
169    /// Apply `f(i, n, acc)` for all lane items `i` at pos `n` in `self` and return the result.
170    fn lanewise_reduce<T>(self, acc: T, f: impl Fn(u8, Self::Item, T) -> T) -> T;
171}
172
173macro_rules! impl_lanes_for {
174    (
175        $(
176            $( #[$attr:meta] )*
177            struct $name:ident([$ty:ty; $n:literal]);
178        )*
179    ) => {
180        $(
181            $( #[$attr] )*
182            #[derive(Copy, Clone)]
183            #[repr(transparent)]
184            struct $name([$ty; $n]);
185
186            impl IntoLanes for $ty {
187                type Lanes = $name;
188                type LaneIdx = ImmLaneIdx<$n>;
189            }
190
191            impl From<[$ty; $n]> for $name {
192                fn from(array: [$ty; $n]) -> Self {
193                    Self(array)
194                }
195            }
196
197            impl Lanes for $name {
198                type Item = $ty;
199                type LaneIdx = ImmLaneIdx<$n>;
200
201                const LANES: usize = $n;
202                const ALL_ONES: Self::Item = <$ty>::from_le_bytes([0xFF_u8; 16 / $n]);
203                const ALL_ZEROS: Self::Item = <$ty>::from_le_bytes([0x00_u8; 16 / $n]);
204
205                fn from_v128(value: V128) -> Self {
206                    // SAFETY: the types chosen to implement `Split` are always
207                    //         of same size as `V128` and have no invalid bit-patterns.
208                    //
209                    // Note: it is important to state that this could be implemented
210                    //       in safe Rust entirely. However, during development it turned
211                    //       out that _not_ using unsafe transmutation confused the
212                    //       optimizer enough that optimizations became very flaky.
213                    //       This was tested across a variety of compiler versions.
214                    Self(unsafe { ::core::mem::transmute::<V128, [$ty; $n]>(value) })
215                }
216
217                fn into_v128(self) -> V128 {
218                    // SAFETY: the types chosen to implement `Combine` are always
219                    //         of same size as `V128` and have no invalid bit-patterns.
220                    //
221                    // Note: see note from `from_v128` method above.
222                    unsafe { ::core::mem::transmute::<[$ty; $n], V128>(self.0) }
223                }
224
225                fn splat(value: Self::Item) -> Self {
226                    Self([value; $n])
227                }
228
229                fn extract_lane(self, lane: Self::LaneIdx) -> Self::Item {
230                    self.0[u8::from(lane) as usize]
231                }
232
233                fn replace_lane(self, lane: Self::LaneIdx, item: Self::Item) -> Self {
234                    let mut this = self;
235                    this.0[u8::from(lane) as usize] = item;
236                    this
237                }
238
239                fn lanewise_unary(self, f: impl Fn(Self::Item) -> Self::Item) -> Self {
240                    let mut this = self.0;
241                    for i in 0..Self::LANES {
242                        this[i] = f(this[i]);
243                    }
244                    Self(this)
245                }
246
247                fn lanewise_binary(self, other: Self, f: impl Fn(Self::Item, Self::Item) -> Self::Item) -> Self {
248                    let mut lhs = self.0;
249                    let rhs = other.0;
250                    for i in 0..Self::LANES {
251                        lhs[i] = f(lhs[i], rhs[i]);
252                    }
253                    Self(lhs)
254                }
255
256                fn lanewise_ternary(self, b: Self, c: Self, f: impl Fn(Self::Item, Self::Item, Self::Item) -> Self::Item) -> Self {
257                    let mut a = self.0;
258                    let b = b.0;
259                    let c = c.0;
260                    for i in 0..Self::LANES {
261                        a[i] = f(a[i], b[i], c[i]);
262                    }
263                    Self(a)
264                }
265
266                fn lanewise_comparison(self, other: Self, f: impl Fn(Self::Item, Self::Item) -> bool) -> Self {
267                    self.lanewise_binary(other, |lhs, rhs| match f(lhs, rhs) {
268                        true => Self::ALL_ONES,
269                        false => Self::ALL_ZEROS,
270                    })
271                }
272
273                fn lanewise_reduce<T>(self, acc: T, f: impl Fn(u8, Self::Item, T) -> T) -> T {
274                    let this = self.0;
275                    let mut acc = acc;
276                    for i in 0..Self::LANES {
277                        acc = f(i as u8, this[i], acc);
278                    }
279                    acc
280                }
281            }
282        )*
283    };
284}
285impl_lanes_for! {
286    /// The Wasm `i64x2` vector type consisting of 2 `i64` values.
287    struct I64x2([i64; 2]);
288    /// The Wasm `u64x2` vector type consisting of 2 `u64` values.
289    struct U64x2([u64; 2]);
290    /// The Wasm `i32x4` vector type consisting of 4 `i32` values.
291    struct I32x4([i32; 4]);
292    /// The Wasm `u32x4` vector type consisting of 4 `u32` values.
293    struct U32x4([u32; 4]);
294    /// The Wasm `i16x8` vector type consisting of 8 `i16` values.
295    struct I16x8([i16; 8]);
296    /// The Wasm `u16x8` vector type consisting of 8 `u16` values.
297    struct U16x8([u16; 8]);
298    /// The Wasm `i8x16` vector type consisting of 16 `i8` values.
299    struct I8x16([i8; 16]);
300    /// The Wasm `u8x16` vector type consisting of 16 `u8` values.
301    struct U8x16([u8; 16]);
302    /// The Wasm `f32x4` vector type consisting of 4 `f32` values.
303    struct F32x4([f32; 4]);
304    /// The Wasm `f64x2` vector type consisting of 2 `f64` values.
305    struct F64x2([f64; 2]);
306}
307
308/// `Self` can be constructed from the narrower lanes.
309///
310/// For example a `i64x2` vector can be constructed from the two lower lanes of a `i32x4`.
311trait FromNarrow<NarrowLanes: Lanes>: Lanes {
312    /// Construct `Self` from the pairwise application of `f` of items in `narrow`.
313    fn pairwise_unary(
314        narrow: NarrowLanes,
315        f: impl Fn(NarrowLanes::Item, NarrowLanes::Item) -> Self::Item,
316    ) -> Self;
317
318    /// Construct `Self` from the pairwise application of `f` of items in `lhs` and `rhs`.
319    fn pairwise_binary(
320        lhs: NarrowLanes,
321        rhs: NarrowLanes,
322        f: impl Fn([NarrowLanes::Item; 2], [NarrowLanes::Item; 2]) -> Self::Item,
323    ) -> Self;
324
325    /// Construct `Self` from the application of `f` to the lower half lanes of `narrow`.
326    fn low_unary(narrow: NarrowLanes, f: impl Fn(NarrowLanes::Item) -> Self::Item) -> Self;
327
328    /// Construct `Self` from the application of `f` to the higher half lanes of `narrow`.
329    fn high_unary(narrow: NarrowLanes, f: impl Fn(NarrowLanes::Item) -> Self::Item) -> Self;
330
331    /// Construct `Self` from the binary application of `f` to the lower half lanes of `narrow_lhs` and `narrow_rhs`.
332    fn low_binary(
333        narrow_lhs: NarrowLanes,
334        narrow_rhs: NarrowLanes,
335        f: impl Fn(NarrowLanes::Item, NarrowLanes::Item) -> Self::Item,
336    ) -> Self;
337
338    /// Construct `Self` from the binary application of `f` to the higher half lanes of `narrow_lhs` and `narrow_rhs`.
339    fn high_binary(
340        narrow_lhs: NarrowLanes,
341        narrow_rhs: NarrowLanes,
342        f: impl Fn(NarrowLanes::Item, NarrowLanes::Item) -> Self::Item,
343    ) -> Self;
344}
345
346macro_rules! impl_from_narrow_for {
347    ( $( impl FromNarrow<$narrow_ty:ty> for $self_ty:ty; )* ) => {
348        $(
349            impl FromNarrow<$narrow_ty> for $self_ty {
350                fn pairwise_unary(
351                    narrow: $narrow_ty,
352                    f: impl Fn(<$narrow_ty as Lanes>::Item, <$narrow_ty as Lanes>::Item) -> Self::Item,
353                ) -> Self {
354                    let narrow = narrow.0;
355                    Self(array::from_fn(|i| f(narrow[2 * i], narrow[2 * i + 1])))
356                }
357
358                fn pairwise_binary(
359                    lhs: $narrow_ty,
360                    rhs: $narrow_ty,
361                    f: impl Fn([<$narrow_ty as Lanes>::Item; 2], [<$narrow_ty as Lanes>::Item; 2]) -> Self::Item,
362                ) -> Self {
363                    let lhs = lhs.0;
364                    let rhs = rhs.0;
365                    Self(array::from_fn(|i| {
366                        f(
367                            [lhs[2 * i], lhs[2 * i + 1]],
368                            [rhs[2 * i], rhs[2 * i + 1]],
369                        )
370                    }))
371                }
372
373                fn low_unary(narrow: $narrow_ty, f: impl Fn(<$narrow_ty as Lanes>::Item) -> Self::Item) -> Self {
374                    Self(array::from_fn(|i| f(narrow.0[i])))
375                }
376
377                fn high_unary(narrow: $narrow_ty, f: impl Fn(<$narrow_ty as Lanes>::Item) -> Self::Item) -> Self {
378                    Self(array::from_fn(|i| f(narrow.0[i + Self::LANES])))
379                }
380
381                fn low_binary(
382                    narrow_lhs: $narrow_ty,
383                    narrow_rhs: $narrow_ty,
384                    f: impl Fn(<$narrow_ty as Lanes>::Item, <$narrow_ty as Lanes>::Item) -> Self::Item,
385                ) -> Self {
386                    let narrow_lhs = narrow_lhs.0;
387                    let narrow_rhs = narrow_rhs.0;
388                    Self(array::from_fn(|i| f(narrow_lhs[i], narrow_rhs[i])))
389                }
390
391                fn high_binary(
392                    narrow_lhs: $narrow_ty,
393                    narrow_rhs: $narrow_ty,
394                    f: impl Fn(<$narrow_ty as Lanes>::Item, <$narrow_ty as Lanes>::Item) -> Self::Item,
395                ) -> Self {
396                    let narrow_lhs = narrow_lhs.0;
397                    let narrow_rhs = narrow_rhs.0;
398                    Self(array::from_fn(|i| {
399                        f(narrow_lhs[i + Self::LANES], narrow_rhs[i + Self::LANES])
400                    }))
401                }
402            }
403        )*
404    };
405}
406impl_from_narrow_for! {
407    impl FromNarrow<I32x4> for I64x2;
408    impl FromNarrow<U32x4> for U64x2;
409    impl FromNarrow<I16x8> for I32x4;
410    impl FromNarrow<U16x8> for U32x4;
411    impl FromNarrow<I8x16> for I16x8;
412    impl FromNarrow<U8x16> for U16x8;
413    impl FromNarrow<F32x4> for I64x2;
414    impl FromNarrow<F32x4> for U64x2;
415    impl FromNarrow<I32x4> for F64x2;
416    impl FromNarrow<U32x4> for F64x2;
417    impl FromNarrow<F32x4> for F64x2;
418}
419
420/// `Self` can be constructed from the wider lanes.
421///
422/// For example a `i32x4` vector can be constructed from a `i64x2`.
423trait FromWide<WideLanes: Lanes>: Lanes {
424    /// Construct `Self` from the application of `f` to the wide `low` and `high` items.
425    fn from_low_high(
426        low: WideLanes,
427        high: WideLanes,
428        f: impl Fn(WideLanes::Item) -> Self::Item,
429    ) -> Self;
430
431    /// Construct `Self` from the application of `f` to the wide `low` or evaluate `high`.
432    fn from_low_or(
433        low: WideLanes,
434        high: impl Fn() -> Self::Item,
435        f: impl Fn(WideLanes::Item) -> Self::Item,
436    ) -> Self;
437}
438
439macro_rules! impl_from_wide_for {
440    (
441        $( impl FromWide<$wide_ty:ty> for $narrow_ty:ty; )*
442    ) => {
443        $(
444            impl FromWide<$wide_ty> for $narrow_ty {
445                fn from_low_high(
446                    low: $wide_ty,
447                    high: $wide_ty,
448                    f: impl Fn(<$wide_ty as Lanes>::Item) -> Self::Item,
449                ) -> Self {
450                    let low = low.0;
451                    let high = high.0;
452                    Self(array::from_fn(|i| {
453                        match i < <$wide_ty as Lanes>::LANES {
454                            true => f(low[i]),
455                            false => f(high[i - <$wide_ty as Lanes>::LANES]),
456                        }
457                    }))
458                }
459
460                fn from_low_or(
461                    low: $wide_ty,
462                    high: impl Fn() -> Self::Item,
463                    f: impl Fn(<$wide_ty as Lanes>::Item) -> Self::Item,
464                ) -> Self {
465                    let low = low.0;
466                    Self(array::from_fn(|i| {
467                        match i < <$wide_ty as Lanes>::LANES {
468                            true => f(low[i]),
469                            false => high(),
470                        }
471                    }))
472                }
473            }
474        )*
475    };
476}
477impl_from_wide_for! {
478    impl FromWide<F64x2> for I32x4;
479    impl FromWide<F64x2> for U32x4;
480    impl FromWide<F64x2> for F32x4;
481    impl FromWide<I32x4> for I16x8;
482    impl FromWide<U32x4> for U16x8;
483    impl FromWide<I16x8> for I8x16;
484    impl FromWide<U16x8> for U8x16;
485}
486
487trait ReinterpretAs<T> {
488    fn reinterpret_as(self) -> T;
489}
490
491macro_rules! impl_reinterpret_as_for {
492    ( $ty0:ty, $ty1:ty ) => {
493        impl ReinterpretAs<$ty0> for $ty1 {
494            fn reinterpret_as(self) -> $ty0 {
495                <$ty0>::from_ne_bytes(self.to_ne_bytes())
496            }
497        }
498
499        impl ReinterpretAs<$ty1> for $ty0 {
500            fn reinterpret_as(self) -> $ty1 {
501                <$ty1>::from_ne_bytes(self.to_ne_bytes())
502            }
503        }
504    };
505}
506impl_reinterpret_as_for!(i32, f32);
507impl_reinterpret_as_for!(u32, f32);
508impl_reinterpret_as_for!(i64, f64);
509impl_reinterpret_as_for!(u64, f64);
510
511impl V128 {
512    /// Convenience method to help implement splatting methods.
513    fn splat<T: IntoLanes>(value: T) -> Self {
514        <<T as IntoLanes>::Lanes>::splat(value).into_v128()
515    }
516
517    /// Convenience method to help implement lane extraction methods.
518    fn extract_lane<T: IntoLanes>(self, lane: <T as IntoLanes>::LaneIdx) -> T {
519        <<T as IntoLanes>::Lanes>::from_v128(self).extract_lane(lane)
520    }
521
522    /// Convenience method to help implement lane replacement methods.
523    fn replace_lane<T: IntoLanes>(self, lane: <T as IntoLanes>::LaneIdx, item: T) -> Self {
524        <<T as IntoLanes>::Lanes>::from_v128(self)
525            .replace_lane(lane, item)
526            .into_v128()
527    }
528
529    /// Convenience method to help implement lanewise unary methods.
530    fn lanewise_unary<T: IntoLanes>(self, f: impl Fn(T) -> T) -> Self {
531        <<T as IntoLanes>::Lanes>::from_v128(self)
532            .lanewise_unary(f)
533            .into_v128()
534    }
535
536    /// Convenience method to help implement lanewise unary cast methods.
537    fn lanewise_unary_cast<T: IntoLanes, U>(self, f: impl Fn(T) -> U) -> Self
538    where
539        U: ReinterpretAs<T>,
540    {
541        <<T as IntoLanes>::Lanes>::from_v128(self)
542            .lanewise_unary(|v| f(v).reinterpret_as())
543            .into_v128()
544    }
545
546    /// Convenience method to help implement lanewise binary methods.
547    fn lanewise_binary<T: IntoLanes>(lhs: Self, rhs: Self, f: impl Fn(T, T) -> T) -> Self {
548        let lhs = <<T as IntoLanes>::Lanes>::from_v128(lhs);
549        let rhs = <<T as IntoLanes>::Lanes>::from_v128(rhs);
550        lhs.lanewise_binary(rhs, f).into_v128()
551    }
552
553    /// Convenience method to help implement lanewise ternary methods.
554    fn lanewise_ternary<T: IntoLanes>(a: Self, b: Self, c: Self, f: impl Fn(T, T, T) -> T) -> Self {
555        let a = <<T as IntoLanes>::Lanes>::from_v128(a);
556        let b = <<T as IntoLanes>::Lanes>::from_v128(b);
557        let c = <<T as IntoLanes>::Lanes>::from_v128(c);
558        a.lanewise_ternary(b, c, f).into_v128()
559    }
560
561    /// Convenience method to help implement lanewise comparison methods.
562    fn lanewise_comparison<T: IntoLanes>(lhs: Self, rhs: Self, f: impl Fn(T, T) -> bool) -> Self {
563        let lhs = <<T as IntoLanes>::Lanes>::from_v128(lhs);
564        let rhs = <<T as IntoLanes>::Lanes>::from_v128(rhs);
565        lhs.lanewise_comparison(rhs, f).into_v128()
566    }
567
568    /// Convenience method to help implement lanewise reduce methods.
569    fn lanewise_reduce<T: IntoLanes, V>(self, acc: V, f: impl Fn(T, V) -> V) -> V {
570        self.lanewise_reduce_enumerate::<T, V>(acc, |_, v: T, acc: V| f(v, acc))
571    }
572
573    /// Convenience method to help implement lanewise reduce methods with a loop-index.
574    fn lanewise_reduce_enumerate<T: IntoLanes, V>(self, acc: V, f: impl Fn(u8, T, V) -> V) -> V {
575        <<T as IntoLanes>::Lanes>::from_v128(self).lanewise_reduce(acc, f)
576    }
577
578    /// Convenience method to help implement pairwise unary methods.
579    fn pairwise_unary<Narrow: IntoLanes, Wide: IntoLanes>(
580        self,
581        f: impl Fn(Narrow, Narrow) -> Wide,
582    ) -> Self
583    where
584        <Wide as IntoLanes>::Lanes: FromNarrow<<Narrow as IntoLanes>::Lanes>,
585    {
586        <<Wide as IntoLanes>::Lanes as FromNarrow<<Narrow as IntoLanes>::Lanes>>::pairwise_unary(
587            <<Narrow as IntoLanes>::Lanes>::from_v128(self),
588            f,
589        )
590        .into_v128()
591    }
592
593    /// Convenience method to help implement pairwise binary methods.
594    fn pairwise_binary<Narrow: IntoLanes, Wide: IntoLanes>(
595        lhs: Self,
596        rhs: Self,
597        f: impl Fn([Narrow; 2], [Narrow; 2]) -> Wide,
598    ) -> Self
599    where
600        <Wide as IntoLanes>::Lanes: FromNarrow<<Narrow as IntoLanes>::Lanes>,
601    {
602        <<Wide as IntoLanes>::Lanes as FromNarrow<<Narrow as IntoLanes>::Lanes>>::pairwise_binary(
603            <<Narrow as IntoLanes>::Lanes>::from_v128(lhs),
604            <<Narrow as IntoLanes>::Lanes>::from_v128(rhs),
605            f,
606        )
607        .into_v128()
608    }
609
610    /// Convenience method to help implement extend-low unary methods.
611    fn low_unary<Narrow: IntoLanes, Wide: IntoLanes>(self, f: impl Fn(Narrow) -> Wide) -> Self
612    where
613        <Wide as IntoLanes>::Lanes: FromNarrow<<Narrow as IntoLanes>::Lanes>,
614    {
615        <<Wide as IntoLanes>::Lanes as FromNarrow<<Narrow as IntoLanes>::Lanes>>::low_unary(
616            <<Narrow as IntoLanes>::Lanes>::from_v128(self),
617            f,
618        )
619        .into_v128()
620    }
621
622    /// Convenience method to help implement extend-high unary methods.
623    fn high_unary<Narrow: IntoLanes, Wide: IntoLanes>(self, f: impl Fn(Narrow) -> Wide) -> Self
624    where
625        <Wide as IntoLanes>::Lanes: FromNarrow<<Narrow as IntoLanes>::Lanes>,
626    {
627        <<Wide as IntoLanes>::Lanes as FromNarrow<<Narrow as IntoLanes>::Lanes>>::high_unary(
628            <<Narrow as IntoLanes>::Lanes>::from_v128(self),
629            f,
630        )
631        .into_v128()
632    }
633
634    /// Convenience method to help implement extend-low binary methods.
635    fn from_low_binary<Narrow: IntoLanes, Wide: IntoLanes>(
636        lhs: Self,
637        rhs: Self,
638        f: impl Fn(Narrow, Narrow) -> Wide,
639    ) -> Self
640    where
641        <Wide as IntoLanes>::Lanes: FromNarrow<<Narrow as IntoLanes>::Lanes>,
642    {
643        <<Wide as IntoLanes>::Lanes as FromNarrow<<Narrow as IntoLanes>::Lanes>>::low_binary(
644            <<Narrow as IntoLanes>::Lanes>::from_v128(lhs),
645            <<Narrow as IntoLanes>::Lanes>::from_v128(rhs),
646            f,
647        )
648        .into_v128()
649    }
650
651    /// Convenience method to help implement extend-high binary methods.
652    fn from_high_binary<Narrow: IntoLanes, Wide: IntoLanes>(
653        lhs: Self,
654        rhs: Self,
655        f: impl Fn(Narrow, Narrow) -> Wide,
656    ) -> Self
657    where
658        <Wide as IntoLanes>::Lanes: FromNarrow<<Narrow as IntoLanes>::Lanes>,
659    {
660        <<Wide as IntoLanes>::Lanes as FromNarrow<<Narrow as IntoLanes>::Lanes>>::high_binary(
661            <<Narrow as IntoLanes>::Lanes>::from_v128(lhs),
662            <<Narrow as IntoLanes>::Lanes>::from_v128(rhs),
663            f,
664        )
665        .into_v128()
666    }
667
668    /// Convenience method to help implement narrowing low-high methods.
669    fn from_low_high<Narrow: IntoLanes, Wide: IntoLanes>(
670        lhs: Self,
671        rhs: Self,
672        f: impl Fn(Wide) -> Narrow,
673    ) -> Self
674    where
675        <Narrow as IntoLanes>::Lanes: FromWide<<Wide as IntoLanes>::Lanes>,
676    {
677        <<Narrow as IntoLanes>::Lanes as FromWide<<Wide as IntoLanes>::Lanes>>::from_low_high(
678            <<Wide as IntoLanes>::Lanes>::from_v128(lhs),
679            <<Wide as IntoLanes>::Lanes>::from_v128(rhs),
680            f,
681        )
682        .into_v128()
683    }
684
685    /// Convenience method to help implement narrowing low-or methods.
686    fn low_or<Narrow: IntoLanes, Wide: IntoLanes>(
687        self,
688        high: impl Fn() -> Narrow,
689        f: impl Fn(Wide) -> Narrow,
690    ) -> Self
691    where
692        <Narrow as IntoLanes>::Lanes: FromWide<<Wide as IntoLanes>::Lanes>,
693    {
694        <<Narrow as IntoLanes>::Lanes as FromWide<<Wide as IntoLanes>::Lanes>>::from_low_or(
695            <<Wide as IntoLanes>::Lanes>::from_v128(self),
696            high,
697            f,
698        )
699        .into_v128()
700    }
701}
702
703/// Concenience identity helper function.
704fn identity<T>(x: T) -> T {
705    x
706}
707
708macro_rules! impl_splat_for {
709    ( $( fn $name:ident(value: $ty:ty) -> V128; )* ) => {
710        $(
711            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
712            pub fn $name(value: $ty) -> V128 {
713                V128::splat(value)
714            }
715        )*
716    };
717}
718impl_splat_for! {
719    fn i64x2_splat(value: i64) -> V128;
720    fn i32x4_splat(value: i32) -> V128;
721    fn i16x8_splat(value: i16) -> V128;
722    fn i8x16_splat(value: i8) -> V128;
723    fn f32x4_splat(value: f32) -> V128;
724    fn f64x2_splat(value: f64) -> V128;
725}
726
727macro_rules! impl_extract_for {
728    ( $( fn $name:ident(v128: V128, lane: $lane_ty:ty) -> $ret_ty:ty = $convert:expr; )* ) => {
729        $(
730            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
731            pub fn $name(v128: V128, lane: $lane_ty) -> $ret_ty {
732                ($convert)(v128.extract_lane(lane))
733            }
734        )*
735    };
736}
737impl_extract_for! {
738    fn i64x2_extract_lane(v128: V128, lane: ImmLaneIdx2) -> i64 = identity;
739    fn i32x4_extract_lane(v128: V128, lane: ImmLaneIdx4) -> i32 = identity;
740    fn f64x2_extract_lane(v128: V128, lane: ImmLaneIdx2) -> f64 = identity;
741    fn f32x4_extract_lane(v128: V128, lane: ImmLaneIdx4) -> f32 = identity;
742    fn i8x16_extract_lane_s(v128: V128, lane: ImmLaneIdx16) -> i32 = <i8 as Into<_>>::into;
743    fn i8x16_extract_lane_u(v128: V128, lane: ImmLaneIdx16) -> u32 = <u8 as Into<_>>::into;
744    fn i16x8_extract_lane_s(v128: V128, lane: ImmLaneIdx8) -> i32 = <i16 as Into<_>>::into;
745    fn i16x8_extract_lane_u(v128: V128, lane: ImmLaneIdx8) -> u32 = <u16 as Into<_>>::into;
746}
747
748macro_rules! impl_replace_for {
749    ( $( fn $name:ident(v128: V128, lane: $lane_ty:ty, item: $item_ty:ty) -> V128; )* ) => {
750        $(
751            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
752            pub fn $name(v128: V128, lane: $lane_ty, item: $item_ty) -> V128 {
753                v128.replace_lane(lane, item)
754            }
755        )*
756    };
757}
758impl_replace_for! {
759    fn i64x2_replace_lane(v128: V128, lane: ImmLaneIdx2, item: i64) -> V128;
760    fn i32x4_replace_lane(v128: V128, lane: ImmLaneIdx4, item: i32) -> V128;
761    fn i16x8_replace_lane(v128: V128, lane: ImmLaneIdx8, item: i16) -> V128;
762    fn i8x16_replace_lane(v128: V128, lane: ImmLaneIdx16, item: i8) -> V128;
763    fn f64x2_replace_lane(v128: V128, lane: ImmLaneIdx2, item: f64) -> V128;
764    fn f32x4_replace_lane(v128: V128, lane: ImmLaneIdx4, item: f32) -> V128;
765}
766
767/// Executes a Wasm `i8x16.shuffle` instruction.
768pub fn i8x16_shuffle(a: V128, b: V128, s: [ImmLaneIdx32; 16]) -> V128 {
769    let a = I8x16::from_v128(a).0;
770    let b = I8x16::from_v128(b).0;
771    I8x16(array::from_fn(|i| match usize::from(u8::from(s[i])) {
772        i @ 0..16 => a[i],
773        i => b[i - 16],
774    }))
775    .into_v128()
776}
777
778/// Executes a Wasm `i8x16.swizzle` instruction.
779pub fn i8x16_swizzle(a: V128, s: V128) -> V128 {
780    let a = U8x16::from_v128(a).0;
781    let s = U8x16::from_v128(s).0;
782    U8x16(array::from_fn(|i| match usize::from(s[i]) {
783        i @ 0..16 => a[i],
784        _ => 0,
785    }))
786    .into_v128()
787}
788
789macro_rules! impl_unary_for {
790    ( $( fn $name:ident(v128: V128) -> V128 = $lanewise_expr:expr; )* ) => {
791        $(
792            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
793            pub fn $name(v128: V128) -> V128 {
794                V128::lanewise_unary(v128, $lanewise_expr)
795            }
796        )*
797    };
798}
799
800macro_rules! impl_unary_cast_for {
801    ( $( fn $name:ident(v128: V128) -> V128 = $lanewise_expr:expr; )* ) => {
802        $(
803            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
804            pub fn $name(v128: V128) -> V128 {
805                V128::lanewise_unary_cast(v128, $lanewise_expr)
806            }
807        )*
808    };
809}
810
811/// Lanewise operation for the Wasm `q15mulr_sat` SIMD operation.
812fn i16x8_q15mulr_sat(x: i16, y: i16) -> i16 {
813    const MIN: i32 = i16::MIN as i32;
814    const MAX: i32 = i16::MAX as i32;
815    let x = i32::from(x);
816    let y = i32::from(y);
817    let q15mulr = (x * y + (1 << 14)) >> 15;
818    q15mulr.clamp(MIN, MAX) as i16
819}
820
821macro_rules! avgr {
822    ($ty:ty as $wide_ty:ty) => {{
823        |a: $ty, b: $ty| {
824            let a = <$wide_ty as ::core::convert::From<$ty>>::from(a);
825            let b = <$wide_ty as ::core::convert::From<$ty>>::from(b);
826            a.wrapping_add(b).div_ceil(2) as $ty
827        }
828    }};
829}
830
831/// Wasm SIMD `pmin` (pseudo-min) definition.
832fn pmin<T: PartialOrd>(lhs: T, rhs: T) -> T {
833    if rhs < lhs {
834        rhs
835    } else {
836        lhs
837    }
838}
839
840/// Wasm SIMD `pmax` (pseudo-max) definition.
841fn pmax<T: PartialOrd>(lhs: T, rhs: T) -> T {
842    if lhs < rhs {
843        rhs
844    } else {
845        lhs
846    }
847}
848
849macro_rules! impl_binary_for {
850    ( $( fn $name:ident(lhs: V128, rhs: V128) -> V128 = $lanewise_expr:expr; )* ) => {
851        $(
852            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
853            pub fn $name(lhs: V128, rhs: V128) -> V128 {
854                V128::lanewise_binary(lhs, rhs, $lanewise_expr)
855            }
856        )*
857    };
858}
859impl_binary_for! {
860    fn i64x2_add(lhs: V128, rhs: V128) -> V128 = i64::wrapping_add;
861    fn i32x4_add(lhs: V128, rhs: V128) -> V128 = i32::wrapping_add;
862    fn i16x8_add(lhs: V128, rhs: V128) -> V128 = i16::wrapping_add;
863    fn i8x16_add(lhs: V128, rhs: V128) -> V128 = i8::wrapping_add;
864
865    fn i64x2_sub(lhs: V128, rhs: V128) -> V128 = i64::wrapping_sub;
866    fn i32x4_sub(lhs: V128, rhs: V128) -> V128 = i32::wrapping_sub;
867    fn i16x8_sub(lhs: V128, rhs: V128) -> V128 = i16::wrapping_sub;
868    fn i8x16_sub(lhs: V128, rhs: V128) -> V128 = i8::wrapping_sub;
869
870    fn i64x2_mul(lhs: V128, rhs: V128) -> V128 = i64::wrapping_mul;
871    fn i32x4_mul(lhs: V128, rhs: V128) -> V128 = i32::wrapping_mul;
872    fn i16x8_mul(lhs: V128, rhs: V128) -> V128 = i16::wrapping_mul;
873    fn i8x16_mul(lhs: V128, rhs: V128) -> V128 = i8::wrapping_mul;
874
875    fn i8x16_add_sat_s(lhs: V128, rhs: V128) -> V128 = i8::saturating_add;
876    fn i8x16_add_sat_u(lhs: V128, rhs: V128) -> V128 = u8::saturating_add;
877    fn i16x8_add_sat_s(lhs: V128, rhs: V128) -> V128 = i16::saturating_add;
878    fn i16x8_add_sat_u(lhs: V128, rhs: V128) -> V128 = u16::saturating_add;
879    fn i8x16_sub_sat_s(lhs: V128, rhs: V128) -> V128 = i8::saturating_sub;
880    fn i8x16_sub_sat_u(lhs: V128, rhs: V128) -> V128 = u8::saturating_sub;
881    fn i16x8_sub_sat_s(lhs: V128, rhs: V128) -> V128 = i16::saturating_sub;
882    fn i16x8_sub_sat_u(lhs: V128, rhs: V128) -> V128 = u16::saturating_sub;
883
884    fn i16x8_q15mulr_sat_s(lhs: V128, rhs: V128) -> V128 = i16x8_q15mulr_sat;
885
886    fn i8x16_min_s(lhs: V128, rhs: V128) -> V128 = i8::min;
887    fn i8x16_min_u(lhs: V128, rhs: V128) -> V128 = u8::min;
888    fn i16x8_min_s(lhs: V128, rhs: V128) -> V128 = i16::min;
889    fn i16x8_min_u(lhs: V128, rhs: V128) -> V128 = u16::min;
890    fn i32x4_min_s(lhs: V128, rhs: V128) -> V128 = i32::min;
891    fn i32x4_min_u(lhs: V128, rhs: V128) -> V128 = u32::min;
892    fn i8x16_max_s(lhs: V128, rhs: V128) -> V128 = i8::max;
893    fn i8x16_max_u(lhs: V128, rhs: V128) -> V128 = u8::max;
894    fn i16x8_max_s(lhs: V128, rhs: V128) -> V128 = i16::max;
895    fn i16x8_max_u(lhs: V128, rhs: V128) -> V128 = u16::max;
896    fn i32x4_max_s(lhs: V128, rhs: V128) -> V128 = i32::max;
897    fn i32x4_max_u(lhs: V128, rhs: V128) -> V128 = u32::max;
898
899    fn i8x16_avgr_u(lhs: V128, rhs: V128) -> V128 = avgr!(u8 as u16);
900    fn i16x8_avgr_u(lhs: V128, rhs: V128) -> V128 = avgr!(u16 as u32);
901
902    fn v128_and(lhs: V128, rhs: V128) -> V128 = <u64 as BitAnd>::bitand;
903    fn v128_or(lhs: V128, rhs: V128) -> V128 = <u64 as BitOr>::bitor;
904    fn v128_xor(lhs: V128, rhs: V128) -> V128 = <u64 as BitXor>::bitxor;
905    fn v128_andnot(lhs: V128, rhs: V128) -> V128 = |a: u64, b: u64| a & !b;
906
907    fn f32x4_min(lhs: V128, rhs: V128) -> V128 = wasm::f32_min;
908    fn f64x2_min(lhs: V128, rhs: V128) -> V128 = wasm::f64_min;
909    fn f32x4_max(lhs: V128, rhs: V128) -> V128 = wasm::f32_max;
910    fn f64x2_max(lhs: V128, rhs: V128) -> V128 = wasm::f64_max;
911    fn f32x4_pmin(lhs: V128, rhs: V128) -> V128 = pmin::<f32>;
912    fn f64x2_pmin(lhs: V128, rhs: V128) -> V128 = pmin::<f64>;
913    fn f32x4_pmax(lhs: V128, rhs: V128) -> V128 = pmax::<f32>;
914    fn f64x2_pmax(lhs: V128, rhs: V128) -> V128 = pmax::<f64>;
915    fn f32x4_add(lhs: V128, rhs: V128) -> V128 = op!(f32, +);
916    fn f64x2_add(lhs: V128, rhs: V128) -> V128 = op!(f64, +);
917    fn f32x4_sub(lhs: V128, rhs: V128) -> V128 = op!(f32, -);
918    fn f64x2_sub(lhs: V128, rhs: V128) -> V128 = op!(f64, -);
919    fn f32x4_div(lhs: V128, rhs: V128) -> V128 = op!(f32, /);
920    fn f64x2_div(lhs: V128, rhs: V128) -> V128 = op!(f64, /);
921    fn f32x4_mul(lhs: V128, rhs: V128) -> V128 = op!(f32, *);
922    fn f64x2_mul(lhs: V128, rhs: V128) -> V128 = op!(f64, *);
923}
924
925impl_unary_for! {
926    fn i64x2_neg(v128: V128) -> V128 = i64::wrapping_neg;
927    fn i32x4_neg(v128: V128) -> V128 = i32::wrapping_neg;
928    fn i16x8_neg(v128: V128) -> V128 = i16::wrapping_neg;
929    fn i8x16_neg(v128: V128) -> V128 = i8::wrapping_neg;
930
931    fn i8x16_abs(v128: V128) -> V128 = i8::wrapping_abs;
932    fn i16x8_abs(v128: V128) -> V128 = i16::wrapping_abs;
933    fn i32x4_abs(v128: V128) -> V128 = i32::wrapping_abs;
934    fn i64x2_abs(v128: V128) -> V128 = i64::wrapping_abs;
935
936    fn v128_not(v128: V128) -> V128 = <i64 as Not>::not;
937
938    fn i8x16_popcnt(v128: V128) -> V128 = |v: u8| v.count_ones() as u8;
939
940    fn f32x4_neg(v128: V128) -> V128 = <f32 as Neg>::neg;
941    fn f64x2_neg(v128: V128) -> V128 = <f64 as Neg>::neg;
942    fn f32x4_abs(v128: V128) -> V128 = f32::abs;
943    fn f64x2_abs(v128: V128) -> V128 = f64::abs;
944    fn f32x4_sqrt(v128: V128) -> V128 = wasm::f32_sqrt;
945    fn f64x2_sqrt(v128: V128) -> V128 = wasm::f64_sqrt;
946    fn f32x4_ceil(v128: V128) -> V128 = wasm::f32_ceil;
947    fn f64x2_ceil(v128: V128) -> V128 = wasm::f64_ceil;
948    fn f32x4_floor(v128: V128) -> V128 = wasm::f32_floor;
949    fn f64x2_floor(v128: V128) -> V128 = wasm::f64_floor;
950    fn f32x4_trunc(v128: V128) -> V128 = wasm::f32_trunc;
951    fn f64x2_trunc(v128: V128) -> V128 = wasm::f64_trunc;
952    fn f32x4_nearest(v128: V128) -> V128 = wasm::f32_nearest;
953    fn f64x2_nearest(v128: V128) -> V128 = wasm::f64_nearest;
954}
955
956impl_unary_cast_for! {
957    fn f32x4_convert_i32x4_s(v128: V128) -> V128 = wasm::f32_convert_i32_s;
958    fn f32x4_convert_i32x4_u(v128: V128) -> V128 = wasm::f32_convert_i32_u;
959    fn i32x4_trunc_sat_f32x4_s(v128: V128) -> V128 = wasm::i32_trunc_sat_f32_s;
960    fn i32x4_trunc_sat_f32x4_u(v128: V128) -> V128 = wasm::i32_trunc_sat_f32_u;
961}
962
963macro_rules! impl_comparison_for {
964    ( $( fn $name:ident(lhs: V128, rhs: V128) -> V128 = $lanewise_expr:expr; )* ) => {
965        $(
966            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
967            pub fn $name(lhs: V128, rhs: V128) -> V128 {
968                V128::lanewise_comparison(lhs, rhs, $lanewise_expr)
969            }
970        )*
971    };
972}
973impl_comparison_for! {
974    fn i8x16_eq(lhs: V128, rhs: V128) -> V128 = op!(i8, ==);
975    fn i16x8_eq(lhs: V128, rhs: V128) -> V128 = op!(i16, ==);
976    fn i32x4_eq(lhs: V128, rhs: V128) -> V128 = op!(i32, ==);
977    fn i64x2_eq(lhs: V128, rhs: V128) -> V128 = op!(i64, ==);
978    fn f32x4_eq(lhs: V128, rhs: V128) -> V128 = op!(f32, ==);
979    fn f64x2_eq(lhs: V128, rhs: V128) -> V128 = op!(f64, ==);
980
981    fn i8x16_ne(lhs: V128, rhs: V128) -> V128 = op!(i8, !=);
982    fn i16x8_ne(lhs: V128, rhs: V128) -> V128 = op!(i16, !=);
983    fn i32x4_ne(lhs: V128, rhs: V128) -> V128 = op!(i32, !=);
984    fn i64x2_ne(lhs: V128, rhs: V128) -> V128 = op!(i64, !=);
985    fn f32x4_ne(lhs: V128, rhs: V128) -> V128 = op!(f32, !=);
986    fn f64x2_ne(lhs: V128, rhs: V128) -> V128 = op!(f64, !=);
987
988    fn i8x16_lt_s(lhs: V128, rhs: V128) -> V128 = op!(i8, <);
989    fn i8x16_lt_u(lhs: V128, rhs: V128) -> V128 = op!(u8, <);
990    fn i16x8_lt_s(lhs: V128, rhs: V128) -> V128 = op!(i16, <);
991    fn i16x8_lt_u(lhs: V128, rhs: V128) -> V128 = op!(u16, <);
992    fn i32x4_lt_s(lhs: V128, rhs: V128) -> V128 = op!(i32, <);
993    fn i32x4_lt_u(lhs: V128, rhs: V128) -> V128 = op!(u32, <);
994    fn i64x2_lt_s(lhs: V128, rhs: V128) -> V128 = op!(i64, <);
995    fn f32x4_lt(lhs: V128, rhs: V128) -> V128 = op!(f32, <);
996    fn f64x2_lt(lhs: V128, rhs: V128) -> V128 = op!(f64, <);
997
998    fn i8x16_le_s(lhs: V128, rhs: V128) -> V128 = op!(i8, <=);
999    fn i8x16_le_u(lhs: V128, rhs: V128) -> V128 = op!(u8, <=);
1000    fn i16x8_le_s(lhs: V128, rhs: V128) -> V128 = op!(i16, <=);
1001    fn i16x8_le_u(lhs: V128, rhs: V128) -> V128 = op!(u16, <=);
1002    fn i32x4_le_s(lhs: V128, rhs: V128) -> V128 = op!(i32, <=);
1003    fn i32x4_le_u(lhs: V128, rhs: V128) -> V128 = op!(u32, <=);
1004    fn i64x2_le_s(lhs: V128, rhs: V128) -> V128 = op!(i64, <=);
1005    fn f32x4_le(lhs: V128, rhs: V128) -> V128 = op!(f32, <=);
1006    fn f64x2_le(lhs: V128, rhs: V128) -> V128 = op!(f64, <=);
1007
1008    fn i8x16_gt_s(lhs: V128, rhs: V128) -> V128 = op!(i8, >);
1009    fn i8x16_gt_u(lhs: V128, rhs: V128) -> V128 = op!(u8, >);
1010    fn i16x8_gt_s(lhs: V128, rhs: V128) -> V128 = op!(i16, >);
1011    fn i16x8_gt_u(lhs: V128, rhs: V128) -> V128 = op!(u16, >);
1012    fn i32x4_gt_s(lhs: V128, rhs: V128) -> V128 = op!(i32, >);
1013    fn i32x4_gt_u(lhs: V128, rhs: V128) -> V128 = op!(u32, >);
1014    fn i64x2_gt_s(lhs: V128, rhs: V128) -> V128 = op!(i64, >);
1015    fn f32x4_gt(lhs: V128, rhs: V128) -> V128 = op!(f32, >);
1016    fn f64x2_gt(lhs: V128, rhs: V128) -> V128 = op!(f64, >);
1017
1018    fn i8x16_ge_s(lhs: V128, rhs: V128) -> V128 = op!(i8, >=);
1019    fn i8x16_ge_u(lhs: V128, rhs: V128) -> V128 = op!(u8, >=);
1020    fn i16x8_ge_s(lhs: V128, rhs: V128) -> V128 = op!(i16, >=);
1021    fn i16x8_ge_u(lhs: V128, rhs: V128) -> V128 = op!(u16, >=);
1022    fn i32x4_ge_s(lhs: V128, rhs: V128) -> V128 = op!(i32, >=);
1023    fn i32x4_ge_u(lhs: V128, rhs: V128) -> V128 = op!(u32, >=);
1024    fn i64x2_ge_s(lhs: V128, rhs: V128) -> V128 = op!(i64, >=);
1025    fn f32x4_ge(lhs: V128, rhs: V128) -> V128 = op!(f32, >=);
1026    fn f64x2_ge(lhs: V128, rhs: V128) -> V128 = op!(f64, >=);
1027}
1028
1029macro_rules! impl_widen_low_unary {
1030    (
1031        $( fn $name:ident(v128: V128) -> V128 = $convert:expr; )*
1032    ) => {
1033        $(
1034            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1035            pub fn $name(v128: V128) -> V128 {
1036                v128.low_unary($convert)
1037            }
1038        )*
1039    };
1040}
1041impl_widen_low_unary! {
1042    fn i16x8_extend_low_i8x16_s(v128: V128) -> V128 = <i8 as Into<i16>>::into;
1043    fn i16x8_extend_low_i8x16_u(v128: V128) -> V128 = <u8 as Into<u16>>::into;
1044    fn i32x4_extend_low_i16x8_s(v128: V128) -> V128 = <i16 as Into<i32>>::into;
1045    fn i32x4_extend_low_i16x8_u(v128: V128) -> V128 = <u16 as Into<u32>>::into;
1046    fn i64x2_extend_low_i32x4_s(v128: V128) -> V128 = <i32 as Into<i64>>::into;
1047    fn i64x2_extend_low_i32x4_u(v128: V128) -> V128 = <u32 as Into<u64>>::into;
1048
1049    fn f64x2_convert_low_i32x4_s(v128: V128) -> V128 = wasm::f64_convert_i32_s;
1050    fn f64x2_convert_low_i32x4_u(v128: V128) -> V128 = wasm::f64_convert_i32_u;
1051    fn f64x2_promote_low_f32x4(v128: V128) -> V128 = wasm::f64_promote_f32;
1052}
1053
1054macro_rules! impl_widen_high_unary {
1055    (
1056        $( fn $name:ident(v128: V128) -> V128 = $convert:expr; )*
1057    ) => {
1058        $(
1059            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1060            pub fn $name(v128: V128) -> V128 {
1061                v128.high_unary($convert)
1062            }
1063        )*
1064    };
1065}
1066impl_widen_high_unary! {
1067    fn i16x8_extend_high_i8x16_s(v128: V128) -> V128 = <i8 as Into<i16>>::into;
1068    fn i16x8_extend_high_i8x16_u(v128: V128) -> V128 = <u8 as Into<u16>>::into;
1069    fn i32x4_extend_high_i16x8_s(v128: V128) -> V128 = <i16 as Into<i32>>::into;
1070    fn i32x4_extend_high_i16x8_u(v128: V128) -> V128 = <u16 as Into<u32>>::into;
1071    fn i64x2_extend_high_i32x4_s(v128: V128) -> V128 = <i32 as Into<i64>>::into;
1072    fn i64x2_extend_high_i32x4_u(v128: V128) -> V128 = <u32 as Into<u64>>::into;
1073}
1074
1075macro_rules! extmul {
1076    ($narrow:ty => $wide:ty) => {{
1077        |a: $narrow, b: $narrow| -> $wide {
1078            let a = <$wide as From<$narrow>>::from(a);
1079            let b = <$wide as From<$narrow>>::from(b);
1080            a.wrapping_mul(b)
1081        }
1082    }};
1083}
1084
1085macro_rules! impl_ext_binary_low {
1086    (
1087        $( fn $name:ident(lhs: V128, rhs: V128) -> V128 = $f:expr; )*
1088    ) => {
1089        $(
1090            #[doc = concat!("Executes a Wasm `", stringify!($extmul_low), "` instruction.")]
1091            pub fn $name(lhs: V128, rhs: V128) -> V128 {
1092                V128::from_low_binary(lhs, rhs, $f)
1093            }
1094        )*
1095    };
1096}
1097impl_ext_binary_low! {
1098    fn i16x8_extmul_low_i8x16_s(lhs: V128, rhs: V128) -> V128 = extmul!( i8 => i16);
1099    fn i16x8_extmul_low_i8x16_u(lhs: V128, rhs: V128) -> V128 = extmul!( u8 => u16);
1100    fn i32x4_extmul_low_i16x8_s(lhs: V128, rhs: V128) -> V128 = extmul!(i16 => i32);
1101    fn i32x4_extmul_low_i16x8_u(lhs: V128, rhs: V128) -> V128 = extmul!(u16 => u32);
1102    fn i64x2_extmul_low_i32x4_s(lhs: V128, rhs: V128) -> V128 = extmul!(i32 => i64);
1103    fn i64x2_extmul_low_i32x4_u(lhs: V128, rhs: V128) -> V128 = extmul!(u32 => u64);
1104}
1105
1106macro_rules! impl_ext_binary_high {
1107    (
1108        $( fn $name:ident(lhs: V128, rhs: V128) -> V128 = $f:expr; )*
1109    ) => {
1110        $(
1111            #[doc = concat!("Executes a Wasm `", stringify!($extmul_low), "` instruction.")]
1112            pub fn $name(lhs: V128, rhs: V128) -> V128 {
1113                V128::from_high_binary(lhs, rhs, $f)
1114            }
1115        )*
1116    };
1117}
1118impl_ext_binary_high! {
1119    fn i16x8_extmul_high_i8x16_s(lhs: V128, rhs: V128) -> V128 = extmul!( i8 => i16);
1120    fn i16x8_extmul_high_i8x16_u(lhs: V128, rhs: V128) -> V128 = extmul!( u8 => u16);
1121    fn i32x4_extmul_high_i16x8_s(lhs: V128, rhs: V128) -> V128 = extmul!(i16 => i32);
1122    fn i32x4_extmul_high_i16x8_u(lhs: V128, rhs: V128) -> V128 = extmul!(u16 => u32);
1123    fn i64x2_extmul_high_i32x4_s(lhs: V128, rhs: V128) -> V128 = extmul!(i32 => i64);
1124    fn i64x2_extmul_high_i32x4_u(lhs: V128, rhs: V128) -> V128 = extmul!(u32 => u64);
1125}
1126
1127macro_rules! impl_extadd_pairwise {
1128    (
1129        $( fn $name:ident(v128: V128) -> V128 = $narrow:ty => $wide:ty; )*
1130    ) => {
1131        $(
1132            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1133            pub fn $name(v128: V128) -> V128 {
1134                fn extadd_pairwise(a: $narrow, b: $narrow) -> $wide {
1135                    let a = <$wide>::from(a);
1136                    let b = <$wide>::from(b);
1137                    a.wrapping_add(b)
1138                }
1139                v128.pairwise_unary(extadd_pairwise)
1140            }
1141        )*
1142    };
1143}
1144impl_extadd_pairwise! {
1145    fn i16x8_extadd_pairwise_i8x16_s(v128: V128) -> V128 = i8 => i16;
1146    fn i16x8_extadd_pairwise_i8x16_u(v128: V128) -> V128 = u8 => u16;
1147    fn i32x4_extadd_pairwise_i16x8_s(v128: V128) -> V128 = i16 => i32;
1148    fn i32x4_extadd_pairwise_i16x8_u(v128: V128) -> V128 = u16 => u32;
1149}
1150
1151macro_rules! impl_shift_ops {
1152    (
1153        $( fn $name:ident(v128: V128, rhs: u32) -> V128 = $lanewise_expr:expr; )*
1154    ) => {
1155        $(
1156            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1157            pub fn $name(v128: V128, rhs: u32) -> V128 {
1158                v128.lanewise_unary(|v| $lanewise_expr(v, rhs))
1159            }
1160        )*
1161    };
1162}
1163impl_shift_ops! {
1164    fn i8x16_shl(v128: V128, rhs: u32) -> V128 = i8::wrapping_shl;
1165    fn i16x8_shl(v128: V128, rhs: u32) -> V128 = i16::wrapping_shl;
1166    fn i32x4_shl(v128: V128, rhs: u32) -> V128 = i32::wrapping_shl;
1167    fn i64x2_shl(v128: V128, rhs: u32) -> V128 = i64::wrapping_shl;
1168    fn i8x16_shr_s(v128: V128, rhs: u32) -> V128 = i8::wrapping_shr;
1169    fn i8x16_shr_u(v128: V128, rhs: u32) -> V128 = u8::wrapping_shr;
1170    fn i16x8_shr_s(v128: V128, rhs: u32) -> V128 = i16::wrapping_shr;
1171    fn i16x8_shr_u(v128: V128, rhs: u32) -> V128 = u16::wrapping_shr;
1172    fn i32x4_shr_s(v128: V128, rhs: u32) -> V128 = i32::wrapping_shr;
1173    fn i32x4_shr_u(v128: V128, rhs: u32) -> V128 = u32::wrapping_shr;
1174    fn i64x2_shr_s(v128: V128, rhs: u32) -> V128 = i64::wrapping_shr;
1175    fn i64x2_shr_u(v128: V128, rhs: u32) -> V128 = u64::wrapping_shr;
1176}
1177
1178macro_rules! impl_narrowing_low_high_ops {
1179    (
1180        $( fn $name:ident(low: V128, high: V128) -> V128 = $f:expr; )*
1181    ) => {
1182        $(
1183            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1184            pub fn $name(low: V128, high: V128) -> V128 {
1185                V128::from_low_high(low, high, $f)
1186            }
1187        )*
1188    };
1189}
1190impl_narrowing_low_high_ops! {
1191    fn i8x16_narrow_i16x8_s(low: V128, high: V128) -> V128 = narrow_i16_to_i8;
1192    fn i8x16_narrow_i16x8_u(low: V128, high: V128) -> V128 = narrow_u16_to_u8;
1193    fn i16x8_narrow_i32x4_s(low: V128, high: V128) -> V128 = narrow_i32_to_i16;
1194    fn i16x8_narrow_i32x4_u(low: V128, high: V128) -> V128 = narrow_u32_to_u16;
1195}
1196
1197macro_rules! def_narrow_from_to {
1198    (
1199        $( fn $name:ident(value: $from:ty $(as $as:ty)? ) -> $to:ty );* $(;)?
1200    ) => {
1201        $(
1202            #[doc = concat!("Narrows `value` from type `", stringify!($from), "` to type `", stringify!($to), "`.")]
1203            fn $name(value: $from) -> $to {
1204                $( let value: $as = value as $as; )?
1205                value.clamp(<$to>::MIN.into(), <$to>::MAX.into()) as $to
1206            }
1207        )*
1208    };
1209}
1210def_narrow_from_to! {
1211    fn narrow_i16_to_i8(value: i16) -> i8;
1212    fn narrow_u16_to_u8(value: u16 as i16) -> u8;
1213    fn narrow_i32_to_i16(value: i32) -> i16;
1214    fn narrow_u32_to_u16(value: u32 as i32) -> u16;
1215}
1216
1217macro_rules! impl_narrowing_low_high_ops {
1218    (
1219        $( fn $name:ident(v128: V128) -> V128 = (high: $high:expr, f: $f:expr); )*
1220    ) => {
1221        $(
1222            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1223            pub fn $name(low: V128) -> V128 {
1224                V128::low_or(low, $high, $f)
1225            }
1226        )*
1227    };
1228}
1229impl_narrowing_low_high_ops! {
1230    fn i32x4_trunc_sat_f64x2_s_zero(v128: V128) -> V128 = (high: || 0, f: wasm::i32_trunc_sat_f64_s);
1231    fn i32x4_trunc_sat_f64x2_u_zero(v128: V128) -> V128 = (high: || 0, f: wasm::i32_trunc_sat_f64_u);
1232    fn f32x4_demote_f64x2_zero(v128: V128) -> V128 = (high: || 0.0, f: wasm::f32_demote_f64);
1233}
1234
1235macro_rules! all_true {
1236    ($ty:ty) => {{
1237        |v: $ty, acc: bool| acc & (v != 0)
1238    }};
1239}
1240macro_rules! impl_all_true_ops {
1241    (
1242        $( fn $name:ident(v128: V128) -> bool = $f:expr; )*
1243    ) => {
1244        $(
1245            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1246            pub fn $name(v128: V128) -> bool {
1247                v128.lanewise_reduce(true, $f)
1248            }
1249        )*
1250    };
1251}
1252impl_all_true_ops! {
1253    fn i8x16_all_true(v128: V128) -> bool = all_true!(i8);
1254    fn i16x8_all_true(v128: V128) -> bool = all_true!(i16);
1255    fn i32x4_all_true(v128: V128) -> bool = all_true!(i32);
1256    fn i64x2_all_true(v128: V128) -> bool = all_true!(i64);
1257}
1258
1259macro_rules! bitmask {
1260    ($ty:ty) => {{
1261        |n: u8, v: $ty, acc| acc | (i32::from(v < 0).wrapping_shl(u32::from(n)))
1262    }};
1263}
1264macro_rules! impl_bitmask_ops {
1265    (
1266        $( fn $name:ident(v128: V128) -> u32 = $f:expr; )*
1267    ) => {
1268        $(
1269            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1270            pub fn $name(v128: V128) -> u32 {
1271                v128.lanewise_reduce_enumerate(0_i32, $f) as _
1272            }
1273        )*
1274    };
1275}
1276impl_bitmask_ops! {
1277    fn i8x16_bitmask(v128: V128) -> u32 = bitmask!(i8);
1278    fn i16x8_bitmask(v128: V128) -> u32 = bitmask!(i16);
1279    fn i32x4_bitmask(v128: V128) -> u32 = bitmask!(i32);
1280    fn i64x2_bitmask(v128: V128) -> u32 = bitmask!(i64);
1281}
1282
1283/// Executes a Wasm `v128.any_true` instruction.
1284pub fn v128_any_true(v128: V128) -> bool {
1285    v128.as_u128() != 0
1286}
1287
1288/// Executes a Wasm `i32x4.dot_i16x8_s` instruction.
1289pub fn i32x4_dot_i16x8_s(lhs: V128, rhs: V128) -> V128 {
1290    fn dot(a: [i16; 2], b: [i16; 2]) -> i32 {
1291        let a = a.map(i32::from);
1292        let b = b.map(i32::from);
1293        let dot0 = a[0].wrapping_mul(b[0]);
1294        let dot1 = a[1].wrapping_mul(b[1]);
1295        dot0.wrapping_add(dot1)
1296    }
1297    V128::pairwise_binary(lhs, rhs, dot)
1298}
1299
1300/// Executes a Wasm `i16x8.relaxed_dot_i8x16_i7x16_s` instruction.
1301///
1302/// # Note
1303///
1304/// This is part of the `relaxed-simd` Wasm proposal.
1305pub fn i16x8_relaxed_dot_i8x16_i7x16_s(lhs: V128, rhs: V128) -> V128 {
1306    fn dot(a: [i8; 2], b: [i8; 2]) -> i16 {
1307        let a = a.map(i16::from);
1308        let b = b.map(i16::from);
1309        let dot0 = a[0].wrapping_mul(b[0]);
1310        let dot1 = a[1].wrapping_mul(b[1]);
1311        dot0.wrapping_add(dot1)
1312    }
1313    V128::pairwise_binary(lhs, rhs, dot)
1314}
1315
1316/// Executes a Wasm `i32x4.relaxed_dot_i8x16_i7x16_add_s` instruction.
1317///
1318/// # Note
1319///
1320/// This is part of the `relaxed-simd` Wasm proposal.
1321pub fn i32x4_relaxed_dot_i8x16_i7x16_add_s(lhs: V128, rhs: V128, c: V128) -> V128 {
1322    let dot = i16x8_relaxed_dot_i8x16_i7x16_s(lhs, rhs);
1323    let ext = i32x4_extadd_pairwise_i16x8_s(dot);
1324    i32x4_add(ext, c)
1325}
1326
1327/// Executes a Wasm `v128.bitselect` instruction.
1328pub fn v128_bitselect(v1: V128, v2: V128, c: V128) -> V128 {
1329    simd::v128_or(simd::v128_and(v1, c), simd::v128_andnot(v2, c))
1330}
1331
1332/// Computes the negative `mul_add`: `-(a * b) + c`
1333fn neg_mul_add<T>(a: T, b: T, c: T) -> T
1334where
1335    T: Float + Neg<Output = T>,
1336{
1337    <T as Float>::mul_add(a.neg(), b, c)
1338}
1339
1340macro_rules! impl_ternary_for {
1341    ( $( fn $name:ident(a: V128, b: V128, c: V128) -> V128 = $lanewise_expr:expr; )* ) => {
1342        $(
1343            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1344            #[doc = ""]
1345            #[doc = "# Note"]
1346            #[doc = ""]
1347            #[doc = "This is part of the `relaxed-simd` Wasm proposal."]
1348            pub fn $name(a: V128, b: V128, c: V128) -> V128 {
1349                V128::lanewise_ternary(a, b, c, $lanewise_expr)
1350            }
1351        )*
1352    };
1353}
1354impl_ternary_for! {
1355    fn f32x4_relaxed_madd(a: V128, b: V128, c: V128) -> V128 = <f32 as Float>::mul_add;
1356    fn f32x4_relaxed_nmadd(a: V128, b: V128, c: V128) -> V128 = neg_mul_add::<f32>;
1357    fn f64x2_relaxed_madd(a: V128, b: V128, c: V128) -> V128 = <f64 as Float>::mul_add;
1358    fn f64x2_relaxed_nmadd(a: V128, b: V128, c: V128) -> V128 = neg_mul_add::<f64>;
1359}
1360
1361/// Executes a Wasm `v128.store` instruction.
1362///
1363/// # Errors
1364///
1365/// - If `ptr + offset` overflows.
1366/// - If `ptr + offset` stores out of bounds from `memory`.
1367pub fn v128_store(memory: &mut [u8], ptr: u64, offset: u64, value: V128) -> Result<(), TrapCode> {
1368    memory::store(memory, ptr, offset, value.as_u128())
1369}
1370
1371/// Executes a Wasm `v128.store` instruction.
1372///
1373/// # Errors
1374///
1375/// If `address` stores out of bounds from `memory`.
1376pub fn v128_store_at(memory: &mut [u8], address: usize, value: V128) -> Result<(), TrapCode> {
1377    memory::store_at(memory, address, value.as_u128())
1378}
1379
1380macro_rules! impl_v128_storeN_lane {
1381    (
1382        $(
1383            fn $name:ident(
1384                memory: &mut [u8],
1385                ptr: u64,
1386                offset: u64,
1387                value: V128,
1388                imm: $lane_idx:ty $(,)?
1389            ) -> Result<(), TrapCode>
1390            = $store_ty:ty;
1391        )*
1392    ) => {
1393        $(
1394            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1395            ///
1396            /// # Errors
1397            ///
1398            /// - If `ptr + offset` overflows.
1399            /// - If `ptr + offset` stores out of bounds from `memory`.
1400            pub fn $name(memory: &mut [u8], ptr: u64, offset: u64, value: V128, imm: $lane_idx) -> Result<(), TrapCode> {
1401                memory::store(memory, ptr, offset, value.extract_lane::<$store_ty>(imm))
1402            }
1403        )*
1404    };
1405}
1406impl_v128_storeN_lane! {
1407    fn v128_store8_lane(
1408        memory: &mut [u8],
1409        ptr: u64,
1410        offset: u64,
1411        value: V128,
1412        imm: ImmLaneIdx16,
1413    ) -> Result<(), TrapCode> = u8;
1414
1415    fn v128_store16_lane(
1416        memory: &mut [u8],
1417        ptr: u64,
1418        offset: u64,
1419        value: V128,
1420        imm: ImmLaneIdx8,
1421    ) -> Result<(), TrapCode> = u16;
1422
1423    fn v128_store32_lane(
1424        memory: &mut [u8],
1425        ptr: u64,
1426        offset: u64,
1427        value: V128,
1428        imm: ImmLaneIdx4,
1429    ) -> Result<(), TrapCode> = u32;
1430
1431    fn v128_store64_lane(
1432        memory: &mut [u8],
1433        ptr: u64,
1434        offset: u64,
1435        value: V128,
1436        imm: ImmLaneIdx2,
1437    ) -> Result<(), TrapCode> = u64;
1438}
1439
1440macro_rules! impl_v128_storeN_lane_at {
1441    (
1442        $( fn $name:ident(memory: &mut [u8], address: usize, value: V128, imm: $lane_idx:ty) -> Result<(), TrapCode> = $store_ty:ty; )*
1443    ) => {
1444        $(
1445            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1446            ///
1447            /// # Errors
1448            ///
1449            /// If `address` stores out of bounds from `memory`.
1450            pub fn $name(memory: &mut [u8], address: usize, value: V128, imm: $lane_idx) -> Result<(), TrapCode> {
1451                memory::store_at(memory, address, value.extract_lane::<$store_ty>(imm))
1452            }
1453        )*
1454    };
1455}
1456impl_v128_storeN_lane_at! {
1457    fn v128_store8_lane_at(
1458        memory: &mut [u8], address: usize, value: V128, imm: ImmLaneIdx16
1459    ) -> Result<(), TrapCode> = u8;
1460    fn v128_store16_lane_at(
1461        memory: &mut [u8], address: usize, value: V128, imm: ImmLaneIdx8
1462    ) -> Result<(), TrapCode> = u16;
1463    fn v128_store32_lane_at(
1464        memory: &mut [u8], address: usize, value: V128, imm: ImmLaneIdx4
1465    ) -> Result<(), TrapCode> = u32;
1466    fn v128_store64_lane_at(
1467        memory: &mut [u8], address: usize, value: V128, imm: ImmLaneIdx2
1468    ) -> Result<(), TrapCode> = u64;
1469}
1470
1471/// Executes a Wasmi `v128.load` instruction.
1472///
1473/// # Errors
1474///
1475/// - If `ptr + offset` overflows.
1476/// - If `ptr + offset` loads out of bounds from `memory`.
1477pub fn v128_load(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> {
1478    memory::load::<u128>(memory, ptr, offset).map(V128::from)
1479}
1480
1481/// Executes a Wasmi `v128.load` instruction.
1482///
1483/// # Errors
1484///
1485/// If `address` loads out of bounds from `memory`.
1486pub fn v128_load_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> {
1487    memory::load_at::<u128>(memory, address).map(V128::from)
1488}
1489
1490macro_rules! impl_v128_loadN_zero_for {
1491    (
1492        $( fn $name:ident(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = $ty:ty; )*
1493    ) => {
1494        $(
1495            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1496            ///
1497            /// # Errors
1498            ///
1499            /// - If `ptr + offset` overflows.
1500            /// - If `ptr + offset` loads out of bounds from `memory`.
1501            pub fn $name(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> {
1502                let bits = memory::load::<$ty>(memory, ptr, offset)?;
1503                Ok(V128::splat::<$ty>(0).replace_lane::<$ty>(<$ty as IntoLaneIdx>::LaneIdx::zero(), bits))
1504            }
1505        )*
1506    };
1507}
1508impl_v128_loadN_zero_for! {
1509    fn v128_load32_zero(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = u32;
1510    fn v128_load64_zero(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = u64;
1511}
1512
1513macro_rules! impl_v128_loadN_zero_at_for {
1514    (
1515        $( fn $name:ident(memory: &[u8], address: usize) -> Result<V128, TrapCode> = $ty:ty; )*
1516    ) => {
1517        $(
1518            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1519            ///
1520            /// # Errors
1521            ///
1522            /// If `address` loads out of bounds from `memory`.
1523            pub fn $name(memory: &[u8], address: usize) -> Result<V128, TrapCode> {
1524                let bits = memory::load_at::<$ty>(memory, address)?;
1525                Ok(V128::splat::<$ty>(0).replace_lane::<$ty>(<$ty as IntoLaneIdx>::LaneIdx::zero(), bits))
1526            }
1527        )*
1528    };
1529}
1530impl_v128_loadN_zero_at_for! {
1531    fn v128_load32_zero_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = u32;
1532    fn v128_load64_zero_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = u64;
1533}
1534
1535macro_rules! impl_v128_loadN_splat_for {
1536    (
1537        $( fn $name:ident(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = $ty:ty; )*
1538    ) => {
1539        $(
1540            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1541            ///
1542            /// # Errors
1543            ///
1544            /// - If `ptr + offset` overflows.
1545            /// - If `ptr + offset` loads out of bounds from `memory`.
1546            pub fn $name(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> {
1547                memory::load::<$ty>(memory, ptr, offset).map(V128::splat)
1548            }
1549        )*
1550    };
1551}
1552impl_v128_loadN_splat_for! {
1553    fn v128_load8_splat(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = u8;
1554    fn v128_load16_splat(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = u16;
1555    fn v128_load32_splat(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = u32;
1556    fn v128_load64_splat(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = u64;
1557}
1558
1559macro_rules! impl_v128_loadN_splat_at_for {
1560    (
1561        $( fn $name:ident(memory: &[u8], address: usize) -> Result<V128, TrapCode> = $ty:ty; )*
1562    ) => {
1563        $(
1564            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1565            ///
1566            /// # Errors
1567            ///
1568            /// If `address` loads out of bounds from `memory`.
1569            pub fn $name(memory: &[u8], address: usize) -> Result<V128, TrapCode> {
1570                memory::load_at::<$ty>(memory, address).map(V128::splat)
1571            }
1572        )*
1573    };
1574}
1575impl_v128_loadN_splat_at_for! {
1576    fn v128_load8_splat_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = u8;
1577    fn v128_load16_splat_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = u16;
1578    fn v128_load32_splat_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = u32;
1579    fn v128_load64_splat_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = u64;
1580}
1581
1582macro_rules! impl_v128_loadN_lane_for {
1583    (
1584        $(
1585            fn $name:ident(
1586                memory: &[u8],
1587                ptr: u64,
1588                offset: u64,
1589                x: V128,
1590                lane: $lane_idx:ty $(,)?
1591            ) -> Result<V128, TrapCode> = $ty:ty; )*
1592    ) => {
1593        $(
1594            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1595            ///
1596            /// # Errors
1597            ///
1598            /// - If `ptr + offset` overflows.
1599            /// - If `ptr + offset` loads out of bounds from `memory`.
1600            pub fn $name(memory: &[u8], ptr: u64, offset: u64, x: V128, lane: $lane_idx) -> Result<V128, TrapCode> {
1601                memory::load::<$ty>(memory, ptr, offset).map(|value| x.replace_lane(lane, value))
1602            }
1603        )*
1604    };
1605}
1606impl_v128_loadN_lane_for! {
1607    fn v128_load8_lane(memory: &[u8], ptr: u64, offset: u64, x: V128, lane: ImmLaneIdx16) -> Result<V128, TrapCode> = u8;
1608    fn v128_load16_lane(memory: &[u8], ptr: u64, offset: u64, x: V128, lane: ImmLaneIdx8) -> Result<V128, TrapCode> = u16;
1609    fn v128_load32_lane(memory: &[u8], ptr: u64, offset: u64, x: V128, lane: ImmLaneIdx4) -> Result<V128, TrapCode> = u32;
1610    fn v128_load64_lane(memory: &[u8], ptr: u64, offset: u64, x: V128, lane: ImmLaneIdx2) -> Result<V128, TrapCode> = u64;
1611}
1612
1613macro_rules! impl_v128_loadN_lane_at_for {
1614    (
1615        $( fn $name:ident(memory: &[u8], address: usize, x: V128, lane: $lane_idx:ty) -> Result<V128, TrapCode> = $ty:ty; )*
1616    ) => {
1617        $(
1618            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1619            ///
1620            /// # Errors
1621            ///
1622            /// If `address` loads out of bounds from `memory`.
1623            pub fn $name(memory: &[u8], address: usize, x: V128, lane: $lane_idx) -> Result<V128, TrapCode> {
1624                memory::load_at::<$ty>(memory, address).map(|value| x.replace_lane(lane, value))
1625            }
1626        )*
1627    };
1628}
1629impl_v128_loadN_lane_at_for! {
1630    fn v128_load8_lane_at(memory: &[u8], address: usize, x: V128, lane: ImmLaneIdx16) -> Result<V128, TrapCode> = u8;
1631    fn v128_load16_lane_at(memory: &[u8], address: usize, x: V128, lane: ImmLaneIdx8) -> Result<V128, TrapCode> = u16;
1632    fn v128_load32_lane_at(memory: &[u8], address: usize, x: V128, lane: ImmLaneIdx4) -> Result<V128, TrapCode> = u32;
1633    fn v128_load64_lane_at(memory: &[u8], address: usize, x: V128, lane: ImmLaneIdx2) -> Result<V128, TrapCode> = u64;
1634}
1635
1636/// Allows `Self` to be safely and efficiently split into `T`.
1637///
1638/// Usually `T` is an array of `U` where `U` fits multiple times into `Self`.
1639/// An example of this is that `u64` can be split into `[u32; 2]`.
1640///
1641/// This is a helper trait to implement [`V128::load_nxm`] generically.
1642trait SplitInto<T> {
1643    type Output;
1644    fn split_into(self) -> Self::Output;
1645}
1646
1647macro_rules! impl_split_into_for {
1648    ( $( impl SplitInto<$ty:ty> for u64; )* ) => {
1649        $(
1650            impl SplitInto<$ty> for u64 {
1651                type Output = [$ty; core::mem::size_of::<u64>() / core::mem::size_of::<$ty>()];
1652
1653                fn split_into(self) -> Self::Output {
1654                    let bytes = self.to_ne_bytes();
1655                    array::from_fn(|i| {
1656                        <$ty>::from_ne_bytes(array::from_fn(|j| {
1657                            bytes[core::mem::size_of::<$ty>() * i + j]
1658                        }))
1659                    })
1660                }
1661            }
1662        )*
1663    };
1664}
1665impl_split_into_for! {
1666    impl SplitInto<u8> for u64;
1667    impl SplitInto<i8> for u64;
1668    impl SplitInto<u16> for u64;
1669    impl SplitInto<i16> for u64;
1670    impl SplitInto<u32> for u64;
1671    impl SplitInto<i32> for u64;
1672}
1673
1674/// Allows to extend all items in an array from `T` to `Ext`.
1675///
1676/// This is a helper trait to implement [`V128::load_nxm`] generically.
1677trait ExtendArray<T> {
1678    type Output;
1679    fn extend_array(self) -> Self::Output;
1680}
1681
1682impl<const N: usize, Ext, T> ExtendArray<Ext> for [T; N]
1683where
1684    T: ExtendInto<Ext>,
1685{
1686    type Output = [Ext; N];
1687    fn extend_array(self) -> Self::Output {
1688        self.map(<T as ExtendInto<Ext>>::extend_into)
1689    }
1690}
1691
1692impl V128 {
1693    /// Interprets `bits` as array of `Narrow` and distribute the (sign) extended items as [`V128`].
1694    fn load_nxm<Narrow, Wide>(bits: u64) -> V128
1695    where
1696        u64: SplitInto<Narrow, Output: ExtendArray<Wide, Output: Into<<Wide as IntoLanes>::Lanes>>>,
1697        Wide: IntoLanes,
1698    {
1699        bits.split_into().extend_array().into().into_v128()
1700    }
1701}
1702
1703macro_rules! impl_v128_load_mxn {
1704    (
1705        $( fn $name:ident(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = ($n:ty => $w:ty); )*
1706    ) => {
1707        $(
1708            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1709            ///
1710            /// # Errors
1711            ///
1712            /// - If `ptr + offset` overflows.
1713            /// - If `ptr + offset` loads out of bounds from `memory`.
1714            pub fn $name(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> {
1715                memory::load::<u64>(memory, ptr, offset).map(V128::load_nxm::<$n, $w>)
1716            }
1717        )*
1718    };
1719}
1720impl_v128_load_mxn! {
1721    fn v128_load8x8_s(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = (i8 => i16);
1722    fn v128_load8x8_u(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = (u8 => u16);
1723    fn v128_load16x4_s(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = (i16 => i32);
1724    fn v128_load16x4_u(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = (u16 => u32);
1725    fn v128_load32x2_s(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = (i32 => i64);
1726    fn v128_load32x2_u(memory: &[u8], ptr: u64, offset: u64) -> Result<V128, TrapCode> = (u32 => u64);
1727}
1728
1729macro_rules! impl_v128_load_mxn_at {
1730    (
1731        $( fn $name:ident(memory: &[u8], address: usize) -> Result<V128, TrapCode> = ($n:ty => $w:ty); )*
1732    ) => {
1733        $(
1734            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1735            ///
1736            /// # Errors
1737            ///
1738            /// If `address` loads out of bounds from `memory`.
1739            pub fn $name(memory: &[u8], address: usize) -> Result<V128, TrapCode> {
1740                memory::load_at::<u64>(memory, address).map(V128::load_nxm::<$n, $w>)
1741            }
1742        )*
1743    };
1744}
1745impl_v128_load_mxn_at! {
1746    fn v128_load8x8_s_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = (i8 => i16);
1747    fn v128_load8x8_u_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = (u8 => u16);
1748    fn v128_load16x4_s_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = (i16 => i32);
1749    fn v128_load16x4_u_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = (u16 => u32);
1750    fn v128_load32x2_s_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = (i32 => i64);
1751    fn v128_load32x2_u_at(memory: &[u8], address: usize) -> Result<V128, TrapCode> = (u32 => u64);
1752}
1753
1754macro_rules! impl_forwarding_relaxed_ops {
1755    (
1756        $(
1757            fn $name:ident(
1758                $( $param_name:ident: $param_ty:ty ),* $(,)?
1759            ) -> $ret_ty:ty
1760            = $forward_fn:expr
1761        );* $(;)?
1762    ) => {
1763        $(
1764            #[doc = concat!("Executes a Wasm `", stringify!($name), "` instruction.")]
1765            #[doc = ""]
1766            #[doc = "# Note"]
1767            #[doc = ""]
1768            #[doc = "This is part of the `relaxed-simd` Wasm proposal."]
1769            pub fn $name( $( $param_name: $param_ty ),* ) -> $ret_ty {
1770                $forward_fn( $( $param_name ),* )
1771            }
1772        )*
1773    };
1774}
1775impl_forwarding_relaxed_ops! {
1776    fn i8x16_relaxed_swizzle(a: V128, s: V128) -> V128 = i8x16_swizzle;
1777
1778    fn i8x16_relaxed_laneselect(a: V128, b: V128, c: V128) -> V128 = v128_bitselect;
1779    fn i16x8_relaxed_laneselect(a: V128, b: V128, c: V128) -> V128 = v128_bitselect;
1780    fn i32x4_relaxed_laneselect(a: V128, b: V128, c: V128) -> V128 = v128_bitselect;
1781    fn i64x2_relaxed_laneselect(a: V128, b: V128, c: V128) -> V128 = v128_bitselect;
1782
1783    fn f32x4_relaxed_min(lhs: V128, rhs: V128) -> V128 = f32x4_min;
1784    fn f32x4_relaxed_max(lhs: V128, rhs: V128) -> V128 = f32x4_max;
1785    fn f64x2_relaxed_min(lhs: V128, rhs: V128) -> V128 = f64x2_min;
1786    fn f64x2_relaxed_max(lhs: V128, rhs: V128) -> V128 = f64x2_max;
1787
1788    fn i16x8_relaxed_q15mulr_s(a: V128, b: V128) -> V128 = i16x8_q15mulr_sat_s;
1789
1790    fn i32x4_relaxed_trunc_f32x4_s(input: V128) -> V128 = i32x4_trunc_sat_f32x4_s;
1791    fn i32x4_relaxed_trunc_f32x4_u(input: V128) -> V128 = i32x4_trunc_sat_f32x4_u;
1792    fn i32x4_relaxed_trunc_f64x2_s_zero(input: V128) -> V128 = i32x4_trunc_sat_f64x2_s_zero;
1793    fn i32x4_relaxed_trunc_f64x2_u_zero(input: V128) -> V128 = i32x4_trunc_sat_f64x2_u_zero;
1794}
1795
1796#[test]
1797fn i32x4_dot_i16x8_s_works() {
1798    assert_eq!(
1799        simd::i32x4_dot_i16x8_s(simd::i16x8_splat(16383_i16), simd::i16x8_splat(16384_i16)),
1800        simd::i32x4_splat(536838144_i32)
1801    );
1802}
1803
1804#[test]
1805fn v128_or_works() {
1806    assert_eq!(
1807        simd::v128_or(simd::i16x8_splat(0), simd::i16x8_splat(0xffff_u16 as i16),),
1808        simd::i16x8_splat(0xffff_u16 as i16),
1809    );
1810}
1811
1812#[test]
1813fn i8x16_narrow_i16x8_s_works() {
1814    assert_eq!(
1815        simd::i8x16_narrow_i16x8_s(simd::i16x8_splat(0x80_i16), simd::i16x8_splat(0x80_i16)),
1816        simd::i8x16_splat(0x7f),
1817    );
1818}