winch_codegen/
masm.rs

1use crate::Result;
2use crate::abi::{self, LocalSlot, align_to};
3use crate::codegen::{CodeGenContext, Emission, FuncEnv};
4use crate::isa::{
5    CallingConvention,
6    reg::{Reg, RegClass, WritableReg, writable},
7};
8use cranelift_codegen::{
9    Final, MachBufferFinalized, MachLabel,
10    binemit::CodeOffset,
11    ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12};
13use std::{fmt::Debug, ops::Range};
14use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15
16pub(crate) use cranelift_codegen::ir::TrapCode;
17
18#[derive(Eq, PartialEq)]
19pub(crate) enum DivKind {
20    /// Signed division.
21    Signed,
22    /// Unsigned division.
23    Unsigned,
24}
25
26/// Represents the `memory.atomic.wait*` kind.
27#[derive(Debug, Clone, Copy)]
28pub(crate) enum AtomicWaitKind {
29    Wait32,
30    Wait64,
31}
32
33/// Remainder kind.
34#[derive(Copy, Clone)]
35pub(crate) enum RemKind {
36    /// Signed remainder.
37    Signed,
38    /// Unsigned remainder.
39    Unsigned,
40}
41
42impl RemKind {
43    pub fn is_signed(&self) -> bool {
44        matches!(self, Self::Signed)
45    }
46}
47
48/// Kinds of vector min operation supported by WebAssembly.
49pub(crate) enum V128MinKind {
50    /// 4 lanes of 32-bit floats.
51    F32x4,
52    /// 2 lanes of 64-bit floats.
53    F64x2,
54    /// 16 lanes of signed 8-bit integers.
55    I8x16S,
56    /// 16 lanes of unsigned 8-bit integers.
57    I8x16U,
58    /// 8 lanes of signed 16-bit integers.
59    I16x8S,
60    /// 8 lanes of unsigned 16-bit integers.
61    I16x8U,
62    /// 4 lanes of signed 32-bit integers.
63    I32x4S,
64    /// 4 lanes of unsigned 32-bit integers.
65    I32x4U,
66}
67
68impl V128MinKind {
69    /// The size of each lane.
70    pub(crate) fn lane_size(&self) -> OperandSize {
71        match self {
72            Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73            Self::F64x2 => OperandSize::S64,
74            Self::I8x16S | Self::I8x16U => OperandSize::S8,
75            Self::I16x8S | Self::I16x8U => OperandSize::S16,
76        }
77    }
78}
79
80/// Kinds of vector max operation supported by WebAssembly.
81pub(crate) enum V128MaxKind {
82    /// 4 lanes of 32-bit floats.
83    F32x4,
84    /// 2 lanes of 64-bit floats.
85    F64x2,
86    /// 16 lanes of signed 8-bit integers.
87    I8x16S,
88    /// 16 lanes of unsigned 8-bit integers.
89    I8x16U,
90    /// 8 lanes of signed 16-bit integers.
91    I16x8S,
92    /// 8 lanes of unsigned 16-bit integers.
93    I16x8U,
94    /// 4 lanes of signed 32-bit integers.
95    I32x4S,
96    /// 4 lanes of unsigned 32-bit integers.
97    I32x4U,
98}
99
100impl V128MaxKind {
101    /// The size of each lane.
102    pub(crate) fn lane_size(&self) -> OperandSize {
103        match self {
104            Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105            Self::F64x2 => OperandSize::S64,
106            Self::I8x16S | Self::I8x16U => OperandSize::S8,
107            Self::I16x8S | Self::I16x8U => OperandSize::S16,
108        }
109    }
110}
111
112#[derive(Eq, PartialEq)]
113pub(crate) enum MulWideKind {
114    Signed,
115    Unsigned,
116}
117
118/// Type of operation for a read-modify-write instruction.
119pub(crate) enum RmwOp {
120    Add,
121    Sub,
122    Xchg,
123    And,
124    Or,
125    Xor,
126}
127
128/// The direction to perform the memory move.
129#[derive(Debug, Clone, Eq, PartialEq)]
130pub(crate) enum MemMoveDirection {
131    /// From high memory addresses to low memory addresses.
132    /// Invariant: the source location is closer to the FP than the destination
133    /// location, which will be closer to the SP.
134    HighToLow,
135    /// From low memory addresses to high memory addresses.
136    /// Invariant: the source location is closer to the SP than the destination
137    /// location, which will be closer to the FP.
138    LowToHigh,
139}
140
141/// Classifies how to treat float-to-int conversions.
142#[derive(Debug, Copy, Clone, Eq, PartialEq)]
143pub(crate) enum TruncKind {
144    /// Saturating conversion. If the source value is greater than the maximum
145    /// value of the destination type, the result is clamped to the
146    /// destination maximum value.
147    Checked,
148    /// An exception is raised if the source value is greater than the maximum
149    /// value of the destination type.
150    Unchecked,
151}
152
153impl TruncKind {
154    /// Returns true if the truncation kind is checked.
155    pub(crate) fn is_checked(&self) -> bool {
156        *self == TruncKind::Checked
157    }
158
159    /// Returns `true` if the trunc kind is [`Unchecked`].
160    ///
161    /// [`Unchecked`]: TruncKind::Unchecked
162    #[must_use]
163    pub(crate) fn is_unchecked(&self) -> bool {
164        matches!(self, Self::Unchecked)
165    }
166}
167
168/// Representation of the stack pointer offset.
169#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170pub struct SPOffset(u32);
171
172impl SPOffset {
173    pub fn from_u32(offs: u32) -> Self {
174        Self(offs)
175    }
176
177    pub fn as_u32(&self) -> u32 {
178        self.0
179    }
180}
181
182/// A stack slot.
183#[derive(Debug, Clone, Copy, Eq, PartialEq)]
184pub struct StackSlot {
185    /// The location of the slot, relative to the stack pointer.
186    pub offset: SPOffset,
187    /// The size of the slot, in bytes.
188    pub size: u32,
189}
190
191impl StackSlot {
192    pub fn new(offs: SPOffset, size: u32) -> Self {
193        Self { offset: offs, size }
194    }
195}
196
197pub trait ScratchType {
198    /// Derive the register class from the scratch register type.
199    fn reg_class() -> RegClass;
200}
201
202/// A scratch register type of integer class.
203pub struct IntScratch;
204/// A scratch register type of floating point class.
205pub struct FloatScratch;
206
207impl ScratchType for IntScratch {
208    fn reg_class() -> RegClass {
209        RegClass::Int
210    }
211}
212
213impl ScratchType for FloatScratch {
214    fn reg_class() -> RegClass {
215        RegClass::Float
216    }
217}
218
219/// A scratch register scope.
220#[derive(Debug, Clone, Copy)]
221pub struct Scratch(Reg);
222
223impl Scratch {
224    pub fn new(r: Reg) -> Self {
225        Self(r)
226    }
227
228    #[inline]
229    pub fn inner(&self) -> Reg {
230        self.0
231    }
232
233    #[inline]
234    pub fn writable(&self) -> WritableReg {
235        writable!(self.0)
236    }
237}
238
239/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
240/// implementation for each ISA is responsible for emitting the correct
241/// sequence of instructions when lowering to machine code.
242#[derive(Debug, Clone, Copy, Eq, PartialEq)]
243pub(crate) enum IntCmpKind {
244    /// Equal.
245    Eq,
246    /// Not equal.
247    Ne,
248    /// Signed less than.
249    LtS,
250    /// Unsigned less than.
251    LtU,
252    /// Signed greater than.
253    GtS,
254    /// Unsigned greater than.
255    GtU,
256    /// Signed less than or equal.
257    LeS,
258    /// Unsigned less than or equal.
259    LeU,
260    /// Signed greater than or equal.
261    GeS,
262    /// Unsigned greater than or equal.
263    GeU,
264}
265
266/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
267/// implementation for each ISA is responsible for emitting the correct
268/// sequence of instructions when lowering code.
269#[derive(Debug)]
270pub(crate) enum FloatCmpKind {
271    /// Equal.
272    Eq,
273    /// Not equal.
274    Ne,
275    /// Less than.
276    Lt,
277    /// Greater than.
278    Gt,
279    /// Less than or equal.
280    Le,
281    /// Greater than or equal.
282    Ge,
283}
284
285/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
286/// responsible for emitting the correct sequence of instructions when
287/// lowering to machine code.
288#[derive(Debug, Clone, Copy, Eq, PartialEq)]
289pub(crate) enum ShiftKind {
290    /// Left shift.
291    Shl,
292    /// Signed right shift.
293    ShrS,
294    /// Unsigned right shift.
295    ShrU,
296    /// Left rotate.
297    Rotl,
298    /// Right rotate.
299    Rotr,
300}
301
302/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
303/// is responsible for emitting the correct sequence of instructions when
304/// lowering to machine code.
305#[derive(Copy, Clone)]
306pub(crate) enum ExtendKind {
307    Signed(Extend<Signed>),
308    Unsigned(Extend<Zero>),
309}
310
311#[derive(Copy, Clone)]
312pub(crate) enum Signed {}
313#[derive(Copy, Clone)]
314pub(crate) enum Zero {}
315
316pub(crate) trait ExtendType {}
317
318impl ExtendType for Signed {}
319impl ExtendType for Zero {}
320
321#[derive(Copy, Clone)]
322pub(crate) enum Extend<T: ExtendType> {
323    /// 8 to 32 bit extend.
324    I32Extend8,
325    /// 16 to 32 bit extend.
326    I32Extend16,
327    /// 8 to 64 bit extend.
328    I64Extend8,
329    /// 16 to 64 bit extend.
330    I64Extend16,
331    /// 32 to 64 bit extend.
332    I64Extend32,
333
334    /// Variant to hold the kind of extend marker.
335    ///
336    /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
337    /// constructed.
338    __Kind(T),
339}
340
341impl From<Extend<Zero>> for ExtendKind {
342    fn from(value: Extend<Zero>) -> Self {
343        ExtendKind::Unsigned(value)
344    }
345}
346
347impl<T: ExtendType> Extend<T> {
348    pub fn from_size(&self) -> OperandSize {
349        match self {
350            Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
351            Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
352            Extend::I64Extend32 => OperandSize::S32,
353            Extend::__Kind(_) => unreachable!(),
354        }
355    }
356
357    pub fn to_size(&self) -> OperandSize {
358        match self {
359            Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
360            Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
361            Extend::__Kind(_) => unreachable!(),
362        }
363    }
364
365    pub fn from_bits(&self) -> u8 {
366        self.from_size().num_bits()
367    }
368
369    pub fn to_bits(&self) -> u8 {
370        self.to_size().num_bits()
371    }
372}
373
374impl From<Extend<Signed>> for ExtendKind {
375    fn from(value: Extend<Signed>) -> Self {
376        ExtendKind::Signed(value)
377    }
378}
379
380impl ExtendKind {
381    pub fn signed(&self) -> bool {
382        match self {
383            Self::Signed(_) => true,
384            _ => false,
385        }
386    }
387
388    pub fn from_bits(&self) -> u8 {
389        match self {
390            Self::Signed(s) => s.from_bits(),
391            Self::Unsigned(u) => u.from_bits(),
392        }
393    }
394
395    pub fn to_bits(&self) -> u8 {
396        match self {
397            Self::Signed(s) => s.to_bits(),
398            Self::Unsigned(u) => u.to_bits(),
399        }
400    }
401}
402
403/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
404/// implementation is responsible for emitting the correct sequence of
405/// instructions when lowering to machine code.
406#[derive(Copy, Clone)]
407pub(crate) enum V128LoadExtendKind {
408    /// Sign extends eight 8 bit integers to eight 16 bit lanes.
409    E8x8S,
410    /// Zero extends eight 8 bit integers to eight 16 bit lanes.
411    E8x8U,
412    /// Sign extends four 16 bit integers to four 32 bit lanes.
413    E16x4S,
414    /// Zero extends four 16 bit integers to four 32 bit lanes.
415    E16x4U,
416    /// Sign extends two 32 bit integers to two 64 bit lanes.
417    E32x2S,
418    /// Zero extends two 32 bit integers to two 64 bit lanes.
419    E32x2U,
420}
421
422/// Kinds of splat loads supported by WebAssembly.
423pub(crate) enum SplatLoadKind {
424    /// 8 bits.
425    S8,
426    /// 16 bits.
427    S16,
428    /// 32 bits.
429    S32,
430    /// 64 bits.
431    S64,
432}
433
434/// Kinds of splat supported by WebAssembly.
435#[derive(Copy, Debug, Clone, Eq, PartialEq)]
436pub(crate) enum SplatKind {
437    /// 8 bit integer.
438    I8x16,
439    /// 16 bit integer.
440    I16x8,
441    /// 32 bit integer.
442    I32x4,
443    /// 64 bit integer.
444    I64x2,
445    /// 32 bit float.
446    F32x4,
447    /// 64 bit float.
448    F64x2,
449}
450
451impl SplatKind {
452    /// The lane size to use for different kinds of splats.
453    pub(crate) fn lane_size(&self) -> OperandSize {
454        match self {
455            SplatKind::I8x16 => OperandSize::S8,
456            SplatKind::I16x8 => OperandSize::S16,
457            SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
458            SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
459        }
460    }
461}
462
463/// Kinds of extract lane supported by WebAssembly.
464#[derive(Copy, Debug, Clone, Eq, PartialEq)]
465pub(crate) enum ExtractLaneKind {
466    /// 16 lanes of 8-bit integers sign extended to 32-bits.
467    I8x16S,
468    /// 16 lanes of 8-bit integers zero extended to 32-bits.
469    I8x16U,
470    /// 8 lanes of 16-bit integers sign extended to 32-bits.
471    I16x8S,
472    /// 8 lanes of 16-bit integers zero extended to 32-bits.
473    I16x8U,
474    /// 4 lanes of 32-bit integers.
475    I32x4,
476    /// 2 lanes of 64-bit integers.
477    I64x2,
478    /// 4 lanes of 32-bit floats.
479    F32x4,
480    /// 2 lanes of 64-bit floats.
481    F64x2,
482}
483
484impl ExtractLaneKind {
485    /// The lane size to use for different kinds of extract lane kinds.
486    pub(crate) fn lane_size(&self) -> OperandSize {
487        match self {
488            ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
489            ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
490            ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
491            ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
492        }
493    }
494}
495
496impl From<ExtractLaneKind> for Extend<Signed> {
497    fn from(value: ExtractLaneKind) -> Self {
498        match value {
499            ExtractLaneKind::I8x16S => Extend::I32Extend8,
500            ExtractLaneKind::I16x8S => Extend::I32Extend16,
501            _ => unimplemented!(),
502        }
503    }
504}
505
506/// Kinds of replace lane supported by WebAssembly.
507pub(crate) enum ReplaceLaneKind {
508    /// 16 lanes of 8 bit integers.
509    I8x16,
510    /// 8 lanes of 16 bit integers.
511    I16x8,
512    /// 4 lanes of 32 bit integers.
513    I32x4,
514    /// 2 lanes of 64 bit integers.
515    I64x2,
516    /// 4 lanes of 32 bit floats.
517    F32x4,
518    /// 2 lanes of 64 bit floats.
519    F64x2,
520}
521
522impl ReplaceLaneKind {
523    /// The lane size to use for different kinds of replace lane kinds.
524    pub(crate) fn lane_size(&self) -> OperandSize {
525        match self {
526            ReplaceLaneKind::I8x16 => OperandSize::S8,
527            ReplaceLaneKind::I16x8 => OperandSize::S16,
528            ReplaceLaneKind::I32x4 => OperandSize::S32,
529            ReplaceLaneKind::I64x2 => OperandSize::S64,
530            ReplaceLaneKind::F32x4 => OperandSize::S32,
531            ReplaceLaneKind::F64x2 => OperandSize::S64,
532        }
533    }
534}
535
536/// Kinds of behavior supported by Wasm loads.
537pub(crate) enum LoadKind {
538    /// Load the entire bytes of the operand size without any modifications.
539    Operand(OperandSize),
540    /// Atomic load, with optional scalar extend.
541    Atomic(OperandSize, Option<ExtendKind>),
542    /// Duplicate value into vector lanes.
543    Splat(SplatLoadKind),
544    /// Scalar (non-vector) extend.
545    ScalarExtend(ExtendKind),
546    /// Vector extend.
547    VectorExtend(V128LoadExtendKind),
548    /// Load content into select lane.
549    VectorLane(LaneSelector),
550    /// Load a single element into the lowest bits of a vector and initialize
551    /// all other bits to zero.
552    VectorZero(OperandSize),
553}
554
555impl LoadKind {
556    /// Returns the [`OperandSize`] used in the load operation.
557    pub(crate) fn derive_operand_size(&self) -> OperandSize {
558        match self {
559            Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
560                Self::operand_size_for_scalar(extend)
561            }
562            Self::VectorExtend(_) => OperandSize::S64,
563            Self::Splat(kind) => Self::operand_size_for_splat(kind),
564            Self::Operand(size)
565            | Self::Atomic(size, None)
566            | Self::VectorLane(LaneSelector { size, .. })
567            | Self::VectorZero(size) => *size,
568        }
569    }
570
571    pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
572        Self::VectorLane(LaneSelector { lane, size })
573    }
574
575    fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
576        match extend_kind {
577            ExtendKind::Signed(s) => s.from_size(),
578            ExtendKind::Unsigned(u) => u.from_size(),
579        }
580    }
581
582    fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
583        match kind {
584            SplatLoadKind::S8 => OperandSize::S8,
585            SplatLoadKind::S16 => OperandSize::S16,
586            SplatLoadKind::S32 => OperandSize::S32,
587            SplatLoadKind::S64 => OperandSize::S64,
588        }
589    }
590
591    pub(crate) fn is_atomic(&self) -> bool {
592        matches!(self, Self::Atomic(_, _))
593    }
594}
595
596/// Kinds of behavior supported by Wasm loads.
597#[derive(Copy, Clone)]
598pub enum StoreKind {
599    /// Store the entire bytes of the operand size without any modifications.
600    Operand(OperandSize),
601    /// Store the entire bytes of the operand size without any modifications, atomically.
602    Atomic(OperandSize),
603    /// Store the content of selected lane.
604    VectorLane(LaneSelector),
605}
606
607impl StoreKind {
608    pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
609        Self::VectorLane(LaneSelector { lane, size })
610    }
611}
612
613#[derive(Copy, Clone)]
614pub struct LaneSelector {
615    pub lane: u8,
616    pub size: OperandSize,
617}
618
619/// Types of vector integer to float conversions supported by WebAssembly.
620pub(crate) enum V128ConvertKind {
621    /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
622    I32x4S,
623    /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
624    I32x4U,
625    /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
626    /// floats.
627    I32x4LowS,
628    /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
629    /// floats.
630    I32x4LowU,
631}
632
633impl V128ConvertKind {
634    pub(crate) fn src_lane_size(&self) -> OperandSize {
635        match self {
636            V128ConvertKind::I32x4S
637            | V128ConvertKind::I32x4U
638            | V128ConvertKind::I32x4LowS
639            | V128ConvertKind::I32x4LowU => OperandSize::S32,
640        }
641    }
642
643    pub(crate) fn dst_lane_size(&self) -> OperandSize {
644        match self {
645            V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
646            V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
647        }
648    }
649}
650
651/// Kinds of vector narrowing operations supported by WebAssembly.
652pub(crate) enum V128NarrowKind {
653    /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
654    /// signed saturation.
655    I16x8S,
656    /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
657    /// unsigned saturation.
658    I16x8U,
659    /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
660    /// signed saturation.
661    I32x4S,
662    /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
663    /// unsigned saturation.
664    I32x4U,
665}
666
667impl V128NarrowKind {
668    /// Return the size of the destination lanes.
669    pub(crate) fn dst_lane_size(&self) -> OperandSize {
670        match self {
671            Self::I16x8S | Self::I16x8U => OperandSize::S8,
672            Self::I32x4S | Self::I32x4U => OperandSize::S16,
673        }
674    }
675}
676
677/// Kinds of vector extending operations supported by WebAssembly.
678#[derive(Debug, Copy, Clone)]
679pub(crate) enum V128ExtendKind {
680    /// Low half of i8x16 sign extended.
681    LowI8x16S,
682    /// High half of i8x16 sign extended.
683    HighI8x16S,
684    /// Low half of i8x16 zero extended.
685    LowI8x16U,
686    /// High half of i8x16 zero extended.
687    HighI8x16U,
688    /// Low half of i16x8 sign extended.
689    LowI16x8S,
690    /// High half of i16x8 sign extended.
691    HighI16x8S,
692    /// Low half of i16x8 zero extended.
693    LowI16x8U,
694    /// High half of i16x8 zero extended.
695    HighI16x8U,
696    /// Low half of i32x4 sign extended.
697    LowI32x4S,
698    /// High half of i32x4 sign extended.
699    HighI32x4S,
700    /// Low half of i32x4 zero extended.
701    LowI32x4U,
702    /// High half of i32x4 zero extended.
703    HighI32x4U,
704}
705
706impl V128ExtendKind {
707    /// The size of the source's lanes.
708    pub(crate) fn src_lane_size(&self) -> OperandSize {
709        match self {
710            Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
711                OperandSize::S8
712            }
713            Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
714                OperandSize::S16
715            }
716            Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
717                OperandSize::S32
718            }
719        }
720    }
721}
722
723/// Kinds of vector equalities and non-equalities supported by WebAssembly.
724pub(crate) enum VectorEqualityKind {
725    /// 16 lanes of 8 bit integers.
726    I8x16,
727    /// 8 lanes of 16 bit integers.
728    I16x8,
729    /// 4 lanes of 32 bit integers.
730    I32x4,
731    /// 2 lanes of 64 bit integers.
732    I64x2,
733    /// 4 lanes of 32 bit floats.
734    F32x4,
735    /// 2 lanes of 64 bit floats.
736    F64x2,
737}
738
739impl VectorEqualityKind {
740    /// Get the lane size to use.
741    pub(crate) fn lane_size(&self) -> OperandSize {
742        match self {
743            Self::I8x16 => OperandSize::S8,
744            Self::I16x8 => OperandSize::S16,
745            Self::I32x4 | Self::F32x4 => OperandSize::S32,
746            Self::I64x2 | Self::F64x2 => OperandSize::S64,
747        }
748    }
749}
750
751/// Kinds of vector comparisons supported by WebAssembly.
752pub(crate) enum VectorCompareKind {
753    /// 16 lanes of signed 8 bit integers.
754    I8x16S,
755    /// 16 lanes of unsigned 8 bit integers.
756    I8x16U,
757    /// 8 lanes of signed 16 bit integers.
758    I16x8S,
759    /// 8 lanes of unsigned 16 bit integers.
760    I16x8U,
761    /// 4 lanes of signed 32 bit integers.
762    I32x4S,
763    /// 4 lanes of unsigned 32 bit integers.
764    I32x4U,
765    /// 2 lanes of signed 64 bit integers.
766    I64x2S,
767    /// 4 lanes of 32 bit floats.
768    F32x4,
769    /// 2 lanes of 64 bit floats.
770    F64x2,
771}
772
773impl VectorCompareKind {
774    /// Get the lane size to use.
775    pub(crate) fn lane_size(&self) -> OperandSize {
776        match self {
777            Self::I8x16S | Self::I8x16U => OperandSize::S8,
778            Self::I16x8S | Self::I16x8U => OperandSize::S16,
779            Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
780            Self::I64x2S | Self::F64x2 => OperandSize::S64,
781        }
782    }
783}
784
785/// Kinds of vector absolute operations supported by WebAssembly.
786#[derive(Copy, Debug, Clone, Eq, PartialEq)]
787pub(crate) enum V128AbsKind {
788    /// 8 bit integers.
789    I8x16,
790    /// 16 bit integers.
791    I16x8,
792    /// 32 bit integers.
793    I32x4,
794    /// 64 bit integers.
795    I64x2,
796    /// 32 bit floats.
797    F32x4,
798    /// 64 bit floats.
799    F64x2,
800}
801
802impl V128AbsKind {
803    /// The lane size to use.
804    pub(crate) fn lane_size(&self) -> OperandSize {
805        match self {
806            Self::I8x16 => OperandSize::S8,
807            Self::I16x8 => OperandSize::S16,
808            Self::I32x4 | Self::F32x4 => OperandSize::S32,
809            Self::I64x2 | Self::F64x2 => OperandSize::S64,
810        }
811    }
812}
813
814/// Kinds of truncation for vectors supported by WebAssembly.
815pub(crate) enum V128TruncKind {
816    /// Truncates 4 lanes of 32-bit floats to nearest integral value.
817    F32x4,
818    /// Truncates 2 lanes of 64-bit floats to nearest integral value.
819    F64x2,
820    /// Integers from signed F32x4.
821    I32x4FromF32x4S,
822    /// Integers from unsigned F32x4.
823    I32x4FromF32x4U,
824    /// Integers from signed F64x2.
825    I32x4FromF64x2SZero,
826    /// Integers from unsigned F64x2.
827    I32x4FromF64x2UZero,
828}
829
830impl V128TruncKind {
831    /// The size of the source lanes.
832    pub(crate) fn src_lane_size(&self) -> OperandSize {
833        match self {
834            V128TruncKind::F32x4
835            | V128TruncKind::I32x4FromF32x4S
836            | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
837            V128TruncKind::F64x2
838            | V128TruncKind::I32x4FromF64x2SZero
839            | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
840        }
841    }
842
843    /// The size of the destination lanes.
844    pub(crate) fn dst_lane_size(&self) -> OperandSize {
845        if let V128TruncKind::F64x2 = self {
846            OperandSize::S64
847        } else {
848            OperandSize::S32
849        }
850    }
851}
852
853/// Kinds of vector addition supported by WebAssembly.
854pub(crate) enum V128AddKind {
855    /// 4 lanes of 32-bit floats wrapping.
856    F32x4,
857    /// 2 lanes of 64-bit floats wrapping.
858    F64x2,
859    /// 16 lanes of 8-bit integers wrapping.
860    I8x16,
861    /// 16 lanes of 8-bit integers signed saturating.
862    I8x16SatS,
863    /// 16 lanes of 8-bit integers unsigned saturating.
864    I8x16SatU,
865    /// 8 lanes of 16-bit integers wrapping.
866    I16x8,
867    /// 8 lanes of 16-bit integers signed saturating.
868    I16x8SatS,
869    /// 8 lanes of 16-bit integers unsigned saturating.
870    I16x8SatU,
871    /// 4 lanes of 32-bit integers wrapping.
872    I32x4,
873    /// 2 lanes of 64-bit integers wrapping.
874    I64x2,
875}
876
877/// Kinds of vector subtraction supported by WebAssembly.
878pub(crate) enum V128SubKind {
879    /// 4 lanes of 32-bit floats wrapping.
880    F32x4,
881    /// 2 lanes of 64-bit floats wrapping.
882    F64x2,
883    /// 16 lanes of 8-bit integers wrapping.
884    I8x16,
885    /// 16 lanes of 8-bit integers signed saturating.
886    I8x16SatS,
887    /// 16 lanes of 8-bit integers unsigned saturating.
888    I8x16SatU,
889    /// 8 lanes of 16-bit integers wrapping.
890    I16x8,
891    /// 8 lanes of 16-bit integers signed saturating.
892    I16x8SatS,
893    /// 8 lanes of 16-bit integers unsigned saturating.
894    I16x8SatU,
895    /// 4 lanes of 32-bit integers wrapping.
896    I32x4,
897    /// 2 lanes of 64-bit integers wrapping.
898    I64x2,
899}
900
901impl From<V128NegKind> for V128SubKind {
902    fn from(value: V128NegKind) -> Self {
903        match value {
904            V128NegKind::I8x16 => Self::I8x16,
905            V128NegKind::I16x8 => Self::I16x8,
906            V128NegKind::I32x4 => Self::I32x4,
907            V128NegKind::I64x2 => Self::I64x2,
908            V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
909        }
910    }
911}
912
913/// Kinds of vector multiplication supported by WebAssembly.
914pub(crate) enum V128MulKind {
915    /// 4 lanes of 32-bit floats.
916    F32x4,
917    /// 2 lanes of 64-bit floats.
918    F64x2,
919    /// 8 lanes of 16-bit integers.
920    I16x8,
921    /// 4 lanes of 32-bit integers.
922    I32x4,
923    /// 2 lanes of 64-bit integers.
924    I64x2,
925}
926
927/// Kinds of vector negation supported by WebAssembly.
928#[derive(Copy, Clone)]
929pub(crate) enum V128NegKind {
930    /// 4 lanes of 32-bit floats.
931    F32x4,
932    /// 2 lanes of 64-bit floats.
933    F64x2,
934    /// 16 lanes of 8-bit integers.
935    I8x16,
936    /// 8 lanes of 16-bit integers.
937    I16x8,
938    /// 4 lanes of 32-bit integers.
939    I32x4,
940    /// 2 lanes of 64-bit integers.
941    I64x2,
942}
943
944impl V128NegKind {
945    /// The size of the lanes.
946    pub(crate) fn lane_size(&self) -> OperandSize {
947        match self {
948            Self::F32x4 | Self::I32x4 => OperandSize::S32,
949            Self::F64x2 | Self::I64x2 => OperandSize::S64,
950            Self::I8x16 => OperandSize::S8,
951            Self::I16x8 => OperandSize::S16,
952        }
953    }
954}
955
956/// Kinds of extended pairwise addition supported by WebAssembly.
957pub(crate) enum V128ExtAddKind {
958    /// 16 lanes of signed 8-bit integers.
959    I8x16S,
960    /// 16 lanes of unsigned 8-bit integers.
961    I8x16U,
962    /// 8 lanes of signed 16-bit integers.
963    I16x8S,
964    /// 8 lanes of unsigned 16-bit integers.
965    I16x8U,
966}
967
968/// Kinds of vector extended multiplication supported by WebAssembly.
969#[derive(Debug, Clone, Copy)]
970pub(crate) enum V128ExtMulKind {
971    LowI8x16S,
972    HighI8x16S,
973    LowI8x16U,
974    HighI8x16U,
975    LowI16x8S,
976    HighI16x8S,
977    LowI16x8U,
978    HighI16x8U,
979    LowI32x4S,
980    HighI32x4S,
981    LowI32x4U,
982    HighI32x4U,
983}
984
985impl From<V128ExtMulKind> for V128ExtendKind {
986    fn from(value: V128ExtMulKind) -> Self {
987        match value {
988            V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
989            V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
990            V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
991            V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
992            V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
993            V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
994            V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
995            V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
996            V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
997            V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
998            V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
999            V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
1000        }
1001    }
1002}
1003
1004impl From<V128ExtMulKind> for V128MulKind {
1005    fn from(value: V128ExtMulKind) -> Self {
1006        match value {
1007            V128ExtMulKind::LowI8x16S
1008            | V128ExtMulKind::HighI8x16S
1009            | V128ExtMulKind::LowI8x16U
1010            | V128ExtMulKind::HighI8x16U => Self::I16x8,
1011            V128ExtMulKind::LowI16x8S
1012            | V128ExtMulKind::HighI16x8S
1013            | V128ExtMulKind::LowI16x8U
1014            | V128ExtMulKind::HighI16x8U => Self::I32x4,
1015            V128ExtMulKind::LowI32x4S
1016            | V128ExtMulKind::HighI32x4S
1017            | V128ExtMulKind::LowI32x4U
1018            | V128ExtMulKind::HighI32x4U => Self::I64x2,
1019        }
1020    }
1021}
1022
1023/// Operand size, in bits.
1024#[derive(Copy, Debug, Clone, Eq, PartialEq)]
1025pub(crate) enum OperandSize {
1026    /// 8 bits.
1027    S8,
1028    /// 16 bits.
1029    S16,
1030    /// 32 bits.
1031    S32,
1032    /// 64 bits.
1033    S64,
1034    /// 128 bits.
1035    S128,
1036}
1037
1038impl OperandSize {
1039    /// The number of bits in the operand.
1040    pub fn num_bits(&self) -> u8 {
1041        match self {
1042            OperandSize::S8 => 8,
1043            OperandSize::S16 => 16,
1044            OperandSize::S32 => 32,
1045            OperandSize::S64 => 64,
1046            OperandSize::S128 => 128,
1047        }
1048    }
1049
1050    /// The number of bytes in the operand.
1051    pub fn bytes(&self) -> u32 {
1052        match self {
1053            Self::S8 => 1,
1054            Self::S16 => 2,
1055            Self::S32 => 4,
1056            Self::S64 => 8,
1057            Self::S128 => 16,
1058        }
1059    }
1060
1061    /// The binary logarithm of the number of bits in the operand.
1062    pub fn log2(&self) -> u8 {
1063        match self {
1064            OperandSize::S8 => 3,
1065            OperandSize::S16 => 4,
1066            OperandSize::S32 => 5,
1067            OperandSize::S64 => 6,
1068            OperandSize::S128 => 7,
1069        }
1070    }
1071
1072    /// Create an [`OperandSize`]  from the given number of bytes.
1073    pub fn from_bytes(bytes: u8) -> Self {
1074        use OperandSize::*;
1075        match bytes {
1076            4 => S32,
1077            8 => S64,
1078            16 => S128,
1079            _ => panic!("Invalid bytes {bytes} for OperandSize"),
1080        }
1081    }
1082
1083    pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1084        match to {
1085            OperandSize::S32 => match self {
1086                OperandSize::S8 => Some(Extend::I32Extend8),
1087                OperandSize::S16 => Some(Extend::I32Extend16),
1088                _ => None,
1089            },
1090            OperandSize::S64 => match self {
1091                OperandSize::S8 => Some(Extend::I64Extend8),
1092                OperandSize::S16 => Some(Extend::I64Extend16),
1093                OperandSize::S32 => Some(Extend::I64Extend32),
1094                _ => None,
1095            },
1096            _ => None,
1097        }
1098    }
1099
1100    /// The number of bits in the mantissa.
1101    ///
1102    /// Only implemented for floats.
1103    pub fn mantissa_bits(&self) -> u8 {
1104        match self {
1105            Self::S32 => 8,
1106            Self::S64 => 11,
1107            _ => unimplemented!(),
1108        }
1109    }
1110}
1111
1112/// An abstraction over a register or immediate.
1113#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1114pub(crate) enum RegImm {
1115    /// A register.
1116    Reg(Reg),
1117    /// A tagged immediate argument.
1118    Imm(Imm),
1119}
1120
1121/// An tagged representation of an immediate.
1122#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1123pub(crate) enum Imm {
1124    /// I32 immediate.
1125    I32(u32),
1126    /// I64 immediate.
1127    I64(u64),
1128    /// F32 immediate.
1129    F32(u32),
1130    /// F64 immediate.
1131    F64(u64),
1132    /// V128 immediate.
1133    V128(i128),
1134}
1135
1136impl Imm {
1137    /// Create a new I64 immediate.
1138    pub fn i64(val: i64) -> Self {
1139        Self::I64(val as u64)
1140    }
1141
1142    /// Create a new I32 immediate.
1143    pub fn i32(val: i32) -> Self {
1144        Self::I32(val as u32)
1145    }
1146
1147    /// Create a new F32 immediate.
1148    pub fn f32(bits: u32) -> Self {
1149        Self::F32(bits)
1150    }
1151
1152    /// Create a new F64 immediate.
1153    pub fn f64(bits: u64) -> Self {
1154        Self::F64(bits)
1155    }
1156
1157    /// Create a new V128 immediate.
1158    pub fn v128(bits: i128) -> Self {
1159        Self::V128(bits)
1160    }
1161
1162    /// Convert the immediate to i32, if possible.
1163    pub fn to_i32(&self) -> Option<i32> {
1164        match self {
1165            Self::I32(v) => Some(*v as i32),
1166            Self::I64(v) => i32::try_from(*v as i64).ok(),
1167            _ => None,
1168        }
1169    }
1170
1171    /// Unwraps the underlying integer value as u64.
1172    /// # Panics
1173    /// This function panics if the underlying value can't be represented
1174    /// as u64.
1175    pub fn unwrap_as_u64(&self) -> u64 {
1176        match self {
1177            Self::I32(v) => *v as u64,
1178            Self::I64(v) => *v,
1179            Self::F32(v) => *v as u64,
1180            Self::F64(v) => *v,
1181            _ => unreachable!(),
1182        }
1183    }
1184
1185    /// Get the operand size of the immediate.
1186    pub fn size(&self) -> OperandSize {
1187        match self {
1188            Self::I32(_) | Self::F32(_) => OperandSize::S32,
1189            Self::I64(_) | Self::F64(_) => OperandSize::S64,
1190            Self::V128(_) => OperandSize::S128,
1191        }
1192    }
1193
1194    /// Get a little endian representation of the immediate.
1195    ///
1196    /// This method heap allocates and is intended to be used when adding
1197    /// values to the constant pool.
1198    pub fn to_bytes(&self) -> Vec<u8> {
1199        match self {
1200            Imm::I32(n) => n.to_le_bytes().to_vec(),
1201            Imm::I64(n) => n.to_le_bytes().to_vec(),
1202            Imm::F32(n) => n.to_le_bytes().to_vec(),
1203            Imm::F64(n) => n.to_le_bytes().to_vec(),
1204            Imm::V128(n) => n.to_le_bytes().to_vec(),
1205        }
1206    }
1207}
1208
1209/// The location of the [VMcontext] used for function calls.
1210#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1211pub(crate) enum VMContextLoc {
1212    /// Dynamic, stored in the given register.
1213    Reg(Reg),
1214    /// The pinned [VMContext] register.
1215    Pinned,
1216    /// A different VMContext is loaded at the provided offset from the current
1217    /// VMContext.
1218    OffsetFromPinned(u32),
1219}
1220
1221/// The maximum number of context arguments currently used across the compiler.
1222pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1223
1224/// Out-of-band special purpose arguments used for function call emission.
1225///
1226/// We cannot rely on the value stack for these values given that inserting
1227/// register or memory values at arbitrary locations of the value stack has the
1228/// potential to break the stack ordering principle, which states that older
1229/// values must always precede newer values, effectively simulating the order of
1230/// values in the machine stack.
1231/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1232/// it might be possible to construct it early on, but given that it might
1233/// contain allocatable registers, it's preferred to construct it in
1234/// [FnCall::emit].
1235#[derive(Clone, Debug)]
1236pub(crate) enum ContextArgs {
1237    /// A single context argument is required; the current pinned [VMcontext]
1238    /// register must be passed as the first argument of the function call.
1239    VMContext([VMContextLoc; 1]),
1240    /// The callee and caller context arguments are required. In this case, the
1241    /// callee context argument is usually stored into an allocatable register
1242    /// and the caller is always the current pinned [VMContext] pointer.
1243    CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1244}
1245
1246impl ContextArgs {
1247    /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1248    /// register as both the caller and callee context arguments.
1249    pub fn pinned_callee_and_caller_vmctx() -> Self {
1250        Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1251    }
1252
1253    /// Construct a [ContextArgs] that declares the usage of the pinned
1254    /// [VMContext] register as the only context argument.
1255    pub fn pinned_vmctx() -> Self {
1256        Self::VMContext([VMContextLoc::Pinned])
1257    }
1258
1259    /// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1260    /// indirectly from the pinned [VMContext] register as the only context
1261    /// argument.
1262    pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1263        Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1264    }
1265
1266    /// Construct a [ContextArgs] that declares a dynamic callee context and the
1267    /// pinned [VMContext] register as the context arguments.
1268    pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1269        Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1270    }
1271
1272    /// Get the length of the [ContextArgs].
1273    pub fn len(&self) -> usize {
1274        self.as_slice().len()
1275    }
1276
1277    /// Get a slice of the context arguments.
1278    pub fn as_slice(&self) -> &[VMContextLoc] {
1279        match self {
1280            Self::VMContext(a) => a.as_slice(),
1281            Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1282        }
1283    }
1284}
1285
1286#[derive(Copy, Clone, Debug)]
1287pub(crate) enum CalleeKind {
1288    /// A function call to a raw address.
1289    Indirect(Reg),
1290    /// A function call to a local function.
1291    Direct(UserExternalNameRef),
1292}
1293
1294impl CalleeKind {
1295    /// Creates a callee kind from a register.
1296    pub fn indirect(reg: Reg) -> Self {
1297        Self::Indirect(reg)
1298    }
1299
1300    /// Creates a direct callee kind from a function name.
1301    pub fn direct(name: UserExternalNameRef) -> Self {
1302        Self::Direct(name)
1303    }
1304}
1305
1306impl RegImm {
1307    /// Register constructor.
1308    pub fn reg(r: Reg) -> Self {
1309        RegImm::Reg(r)
1310    }
1311
1312    /// I64 immediate constructor.
1313    pub fn i64(val: i64) -> Self {
1314        RegImm::Imm(Imm::i64(val))
1315    }
1316
1317    /// I32 immediate constructor.
1318    pub fn i32(val: i32) -> Self {
1319        RegImm::Imm(Imm::i32(val))
1320    }
1321
1322    /// F32 immediate, stored using its bits representation.
1323    pub fn f32(bits: u32) -> Self {
1324        RegImm::Imm(Imm::f32(bits))
1325    }
1326
1327    /// F64 immediate, stored using its bits representation.
1328    pub fn f64(bits: u64) -> Self {
1329        RegImm::Imm(Imm::f64(bits))
1330    }
1331
1332    /// V128 immediate.
1333    pub fn v128(bits: i128) -> Self {
1334        RegImm::Imm(Imm::v128(bits))
1335    }
1336}
1337
1338impl From<Reg> for RegImm {
1339    fn from(r: Reg) -> Self {
1340        Self::Reg(r)
1341    }
1342}
1343
1344#[derive(Debug)]
1345pub enum RoundingMode {
1346    Nearest,
1347    Up,
1348    Down,
1349    Zero,
1350}
1351
1352/// Memory flags for trusted loads/stores.
1353pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1354
1355/// Flags used for WebAssembly loads / stores.
1356/// Untrusted by default so we don't set `no_trap`.
1357/// We also ensure that the endianness is the right one for WebAssembly.
1358pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1359
1360/// Generic MacroAssembler interface used by the code generation.
1361///
1362/// The MacroAssembler trait aims to expose an interface, high-level enough,
1363/// so that each ISA can provide its own lowering to machine code. For example,
1364/// for WebAssembly operators that don't have a direct mapping to a machine
1365/// a instruction, the interface defines a signature matching the WebAssembly
1366/// operator, allowing each implementation to lower such operator entirely.
1367/// This approach attributes more responsibility to the MacroAssembler, but frees
1368/// the caller from concerning about assembling the right sequence of
1369/// instructions at the operator callsite.
1370///
1371/// The interface defaults to a three-argument form for binary operations;
1372/// this allows a natural mapping to instructions for RISC architectures,
1373/// that use three-argument form.
1374/// This approach allows for a more general interface that can be restricted
1375/// where needed, in the case of architectures that use a two-argument form.
1376
1377pub(crate) trait MacroAssembler {
1378    /// The addressing mode.
1379    type Address: Copy + Debug;
1380
1381    /// The pointer representation of the target ISA,
1382    /// used to access information from [`VMOffsets`].
1383    type Ptr: PtrSize;
1384
1385    /// The ABI details of the target.
1386    type ABI: abi::ABI;
1387
1388    /// Emit the function prologue.
1389    fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1390        self.frame_setup()?;
1391        self.check_stack(vmctx)
1392    }
1393
1394    /// Generate the frame setup sequence.
1395    fn frame_setup(&mut self) -> Result<()>;
1396
1397    /// Generate the frame restore sequence.
1398    fn frame_restore(&mut self) -> Result<()>;
1399
1400    /// Emit a stack check.
1401    fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1402
1403    /// Emit the function epilogue.
1404    fn epilogue(&mut self) -> Result<()> {
1405        self.frame_restore()
1406    }
1407
1408    /// Reserve stack space.
1409    fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1410
1411    /// Free stack space.
1412    fn free_stack(&mut self, bytes: u32) -> Result<()>;
1413
1414    /// Reset the stack pointer to the given offset;
1415    ///
1416    /// Used to reset the stack pointer to a given offset
1417    /// when dealing with unreachable code.
1418    fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1419
1420    /// Get the address of a local slot.
1421    fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1422
1423    /// Constructs an address with an offset that is relative to the
1424    /// current position of the stack pointer (e.g. [sp + (sp_offset -
1425    /// offset)].
1426    fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1427
1428    /// Constructs an address with an offset that is absolute to the
1429    /// current position of the stack pointer (e.g. [sp + offset].
1430    fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1431
1432    /// Alias for [`Self::address_at_reg`] using the VMContext register as
1433    /// a base. The VMContext register is derived from the ABI type that is
1434    /// associated to the MacroAssembler.
1435    fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1436
1437    /// Construct an address that is absolute to the current position
1438    /// of the given register.
1439    fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1440
1441    /// Emit a function call to either a local or external function.
1442    fn call(
1443        &mut self,
1444        stack_args_size: u32,
1445        f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1446    ) -> Result<u32>;
1447
1448    /// Acquire a scratch register and execute the given callback.
1449    fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1450
1451    /// Convenience wrapper over [`Self::with_scratch`], derives the register class
1452    /// for a particular Wasm value type.
1453    fn with_scratch_for<R>(
1454        &mut self,
1455        ty: WasmValType,
1456        f: impl FnOnce(&mut Self, Scratch) -> R,
1457    ) -> R {
1458        match ty {
1459            WasmValType::I32
1460            | WasmValType::I64
1461            | WasmValType::Ref(WasmRefType {
1462                heap_type: WasmHeapType::Func,
1463                ..
1464            }) => self.with_scratch::<IntScratch, _>(f),
1465            WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1466                self.with_scratch::<FloatScratch, _>(f)
1467            }
1468            _ => unimplemented!(),
1469        }
1470    }
1471
1472    /// Get stack pointer offset.
1473    fn sp_offset(&self) -> Result<SPOffset>;
1474
1475    /// Perform a stack store.
1476    fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1477
1478    /// Alias for `MacroAssembler::store` with the operand size corresponding
1479    /// to the pointer size of the target.
1480    fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1481
1482    /// Perform a WebAssembly store.
1483    /// A WebAssembly store introduces several additional invariants compared to
1484    /// [Self::store], more precisely, it can implicitly trap, in certain
1485    /// circumstances, even if explicit bounds checks are elided, in that sense,
1486    /// we consider this type of load as untrusted. It can also differ with
1487    /// regards to the endianness depending on the target ISA. For this reason,
1488    /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1489    /// stores.
1490    fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1491
1492    /// Perform a zero-extended stack load.
1493    fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1494
1495    /// Perform a WebAssembly load.
1496    /// A WebAssembly load introduces several additional invariants compared to
1497    /// [Self::load], more precisely, it can implicitly trap, in certain
1498    /// circumstances, even if explicit bounds checks are elided, in that sense,
1499    /// we consider this type of load as untrusted. It can also differ with
1500    /// regards to the endianness depending on the target ISA. For this reason,
1501    /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1502    /// loads.
1503    fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1504
1505    /// Alias for `MacroAssembler::load` with the operand size corresponding
1506    /// to the pointer size of the target.
1507    fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1508
1509    /// Computes the effective address and stores the result in the destination
1510    /// register.
1511    fn compute_addr(
1512        &mut self,
1513        _src: Self::Address,
1514        _dst: WritableReg,
1515        _size: OperandSize,
1516    ) -> Result<()>;
1517
1518    /// Pop a value from the machine stack into the given register.
1519    fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1520
1521    /// Perform a move.
1522    fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1523
1524    /// Perform a conditional move.
1525    fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1526    -> Result<()>;
1527
1528    /// Performs a memory move of bytes from src to dest.
1529    /// Bytes are moved in blocks of 8 bytes, where possible.
1530    fn memmove(
1531        &mut self,
1532        src: SPOffset,
1533        dst: SPOffset,
1534        bytes: u32,
1535        direction: MemMoveDirection,
1536    ) -> Result<()> {
1537        match direction {
1538            MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1539            MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1540        }
1541        // At least 4 byte aligned.
1542        debug_assert!(bytes % 4 == 0);
1543        let mut remaining = bytes;
1544        let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1545
1546        let word_bytes = word_bytes as u32;
1547
1548        let mut dst_offs;
1549        let mut src_offs;
1550        match direction {
1551            MemMoveDirection::LowToHigh => {
1552                dst_offs = dst.as_u32() - bytes;
1553                src_offs = src.as_u32() - bytes;
1554                self.with_scratch::<IntScratch, _>(|masm, scratch| {
1555                    while remaining >= word_bytes {
1556                        remaining -= word_bytes;
1557                        dst_offs += word_bytes;
1558                        src_offs += word_bytes;
1559
1560                        masm.load_ptr(
1561                            masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1562                            scratch.writable(),
1563                        )?;
1564                        masm.store_ptr(
1565                            scratch.inner(),
1566                            masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1567                        )?;
1568                    }
1569                    wasmtime_environ::error::Ok(())
1570                })?;
1571            }
1572            MemMoveDirection::HighToLow => {
1573                // Go from the end to the beginning to handle overlapping addresses.
1574                src_offs = src.as_u32();
1575                dst_offs = dst.as_u32();
1576                self.with_scratch::<IntScratch, _>(|masm, scratch| {
1577                    while remaining >= word_bytes {
1578                        masm.load_ptr(
1579                            masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1580                            scratch.writable(),
1581                        )?;
1582                        masm.store_ptr(
1583                            scratch.inner(),
1584                            masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1585                        )?;
1586
1587                        remaining -= word_bytes;
1588                        src_offs -= word_bytes;
1589                        dst_offs -= word_bytes;
1590                    }
1591                    wasmtime_environ::error::Ok(())
1592                })?;
1593            }
1594        }
1595
1596        if remaining > 0 {
1597            let half_word = word_bytes / 2;
1598            let ptr_size = OperandSize::from_bytes(half_word as u8);
1599            debug_assert!(remaining == half_word);
1600            // Need to move the offsets ahead in the `LowToHigh` case to
1601            // compensate for the initial subtraction of `bytes`.
1602            if direction == MemMoveDirection::LowToHigh {
1603                dst_offs += half_word;
1604                src_offs += half_word;
1605            }
1606
1607            self.with_scratch::<IntScratch, _>(|masm, scratch| {
1608                masm.load(
1609                    masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1610                    scratch.writable(),
1611                    ptr_size,
1612                )?;
1613                masm.store(
1614                    scratch.inner().into(),
1615                    masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1616                    ptr_size,
1617                )?;
1618                wasmtime_environ::error::Ok(())
1619            })?;
1620        }
1621        Ok(())
1622    }
1623
1624    /// Perform add operation.
1625    fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1626
1627    /// Perform a checked unsigned integer addition, emitting the provided trap
1628    /// if the addition overflows.
1629    fn checked_uadd(
1630        &mut self,
1631        dst: WritableReg,
1632        lhs: Reg,
1633        rhs: RegImm,
1634        size: OperandSize,
1635        trap: TrapCode,
1636    ) -> Result<()>;
1637
1638    /// Perform subtraction operation.
1639    fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1640
1641    /// Perform multiplication operation.
1642    fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1643
1644    /// Perform a floating point add operation.
1645    fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1646
1647    /// Perform a floating point subtraction operation.
1648    fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1649
1650    /// Perform a floating point multiply operation.
1651    fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1652
1653    /// Perform a floating point divide operation.
1654    fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1655
1656    /// Perform a floating point minimum operation. In x86, this will emit
1657    /// multiple instructions.
1658    fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1659
1660    /// Perform a floating point maximum operation. In x86, this will emit
1661    /// multiple instructions.
1662    fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1663
1664    /// Perform a floating point copysign operation. In x86, this will emit
1665    /// multiple instructions.
1666    fn float_copysign(
1667        &mut self,
1668        dst: WritableReg,
1669        lhs: Reg,
1670        rhs: Reg,
1671        size: OperandSize,
1672    ) -> Result<()>;
1673
1674    /// Perform a floating point abs operation.
1675    fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1676
1677    /// Perform a floating point negation operation.
1678    fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1679
1680    /// Perform a floating point floor operation.
1681    fn float_round<
1682        F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1683    >(
1684        &mut self,
1685        mode: RoundingMode,
1686        env: &mut FuncEnv<Self::Ptr>,
1687        context: &mut CodeGenContext<Emission>,
1688        size: OperandSize,
1689        fallback: F,
1690    ) -> Result<()>;
1691
1692    /// Perform a floating point square root operation.
1693    fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1694
1695    /// Perform logical and operation.
1696    fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1697
1698    /// Perform logical or operation.
1699    fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1700
1701    /// Perform logical exclusive or operation.
1702    fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1703
1704    /// Perform a shift operation between a register and an immediate.
1705    fn shift_ir(
1706        &mut self,
1707        dst: WritableReg,
1708        imm: Imm,
1709        lhs: Reg,
1710        kind: ShiftKind,
1711        size: OperandSize,
1712    ) -> Result<()>;
1713
1714    /// Perform a shift operation between two registers.
1715    /// This case is special in that some architectures have specific expectations
1716    /// regarding the location of the instruction arguments. To free the
1717    /// caller from having to deal with the architecture specific constraints
1718    /// we give this function access to the code generation context, allowing
1719    /// each implementation to decide the lowering path.
1720    fn shift(
1721        &mut self,
1722        context: &mut CodeGenContext<Emission>,
1723        kind: ShiftKind,
1724        size: OperandSize,
1725    ) -> Result<()>;
1726
1727    /// Perform division operation.
1728    /// Division is special in that some architectures have specific
1729    /// expectations regarding the location of the instruction
1730    /// arguments and regarding the location of the quotient /
1731    /// remainder. To free the caller from having to deal with the
1732    /// architecture specific constraints we give this function access
1733    /// to the code generation context, allowing each implementation
1734    /// to decide the lowering path.  For cases in which division is a
1735    /// unconstrained binary operation, the caller can decide to use
1736    /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1737    /// functions.
1738    fn div(
1739        &mut self,
1740        context: &mut CodeGenContext<Emission>,
1741        kind: DivKind,
1742        size: OperandSize,
1743    ) -> Result<()>;
1744
1745    /// Calculate remainder.
1746    fn rem(
1747        &mut self,
1748        context: &mut CodeGenContext<Emission>,
1749        kind: RemKind,
1750        size: OperandSize,
1751    ) -> Result<()>;
1752
1753    /// Compares `src1` against `src2` for the side effect of setting processor
1754    /// flags.
1755    ///
1756    /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1757    /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1758    /// `src2 == b`
1759    fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1760
1761    /// Compare src and dst and put the result in dst.
1762    /// This function will potentially emit a series of instructions.
1763    ///
1764    /// The initial value in `dst` is the left-hand-side of the comparison and
1765    /// the initial value in `src` is the right-hand-side of the comparison.
1766    /// That means for `a < b` then `dst == a` and `src == b`.
1767    fn cmp_with_set(
1768        &mut self,
1769        dst: WritableReg,
1770        src: RegImm,
1771        kind: IntCmpKind,
1772        size: OperandSize,
1773    ) -> Result<()>;
1774
1775    /// Compare floats in src1 and src2 and put the result in dst.
1776    /// In x86, this will emit multiple instructions.
1777    fn float_cmp_with_set(
1778        &mut self,
1779        dst: WritableReg,
1780        src1: Reg,
1781        src2: Reg,
1782        kind: FloatCmpKind,
1783        size: OperandSize,
1784    ) -> Result<()>;
1785
1786    /// Count the number of leading zeroes in src and put the result in dst.
1787    /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1788    /// false.
1789    fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1790
1791    /// Count the number of trailing zeroes in src and put the result in dst.masm
1792    /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1793    /// false.
1794    fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1795
1796    /// Push the register to the stack, returning the stack slot metadata.
1797    // NB
1798    // The stack alignment should not be assumed after any call to `push`,
1799    // unless explicitly aligned otherwise.  Typically, stack alignment is
1800    // maintained at call sites and during the execution of
1801    // epilogues.
1802    fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1803
1804    /// Finalize the assembly and return the result.
1805    fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1806
1807    /// Zero a particular register.
1808    fn zero(&mut self, reg: WritableReg) -> Result<()>;
1809
1810    /// Count the number of 1 bits in src and put the result in dst. In x64,
1811    /// this will emit multiple instructions if the `has_popcnt` flag is false.
1812    fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1813
1814    /// Converts an i64 to an i32 by discarding the high 32 bits.
1815    fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1816
1817    /// Extends an integer of a given size to a larger size.
1818    fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1819
1820    /// Emits one or more instructions to perform a signed truncation of a
1821    /// float into an integer.
1822    fn signed_truncate(
1823        &mut self,
1824        dst: WritableReg,
1825        src: Reg,
1826        src_size: OperandSize,
1827        dst_size: OperandSize,
1828        kind: TruncKind,
1829    ) -> Result<()>;
1830
1831    /// Emits one or more instructions to perform an unsigned truncation of a
1832    /// float into an integer.
1833    fn unsigned_truncate(
1834        &mut self,
1835        context: &mut CodeGenContext<Emission>,
1836        src_size: OperandSize,
1837        dst_size: OperandSize,
1838        kind: TruncKind,
1839    ) -> Result<()>;
1840
1841    /// Emits one or more instructions to perform a signed convert of an
1842    /// integer into a float.
1843    fn signed_convert(
1844        &mut self,
1845        dst: WritableReg,
1846        src: Reg,
1847        src_size: OperandSize,
1848        dst_size: OperandSize,
1849    ) -> Result<()>;
1850
1851    /// Emits one or more instructions to perform an unsigned convert of an
1852    /// integer into a float.
1853    fn unsigned_convert(
1854        &mut self,
1855        dst: WritableReg,
1856        src: Reg,
1857        tmp_gpr: Reg,
1858        src_size: OperandSize,
1859        dst_size: OperandSize,
1860    ) -> Result<()>;
1861
1862    /// Reinterpret a float as an integer.
1863    fn reinterpret_float_as_int(
1864        &mut self,
1865        dst: WritableReg,
1866        src: Reg,
1867        size: OperandSize,
1868    ) -> Result<()>;
1869
1870    /// Reinterpret an integer as a float.
1871    fn reinterpret_int_as_float(
1872        &mut self,
1873        dst: WritableReg,
1874        src: Reg,
1875        size: OperandSize,
1876    ) -> Result<()>;
1877
1878    /// Demote an f64 to an f32.
1879    fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1880
1881    /// Promote an f32 to an f64.
1882    fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1883
1884    /// Zero a given memory range.
1885    ///
1886    /// The default implementation divides the given memory range
1887    /// into word-sized slots. Then it unrolls a series of store
1888    /// instructions, effectively assigning zero to each slot.
1889    fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1890        let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1891        if mem.is_empty() {
1892            return Ok(());
1893        }
1894
1895        let start = if mem.start % word_size == 0 {
1896            mem.start
1897        } else {
1898            // Ensure that the start of the range is at least 4-byte aligned.
1899            assert!(mem.start % 4 == 0);
1900            let start = align_to(mem.start, word_size);
1901            let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1902            self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1903            // Ensure that the new start of the range, is word-size aligned.
1904            assert!(start % word_size == 0);
1905            start
1906        };
1907
1908        let end = align_to(mem.end, word_size);
1909        let slots = (end - start) / word_size;
1910
1911        if slots == 1 {
1912            let slot = LocalSlot::i64(start + word_size);
1913            let addr: Self::Address = self.local_address(&slot)?;
1914            self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1915        } else {
1916            // TODO
1917            // Add an upper bound to this generation;
1918            // given a considerably large amount of slots
1919            // this will be inefficient.
1920            self.with_scratch::<IntScratch, _>(|masm, scratch| {
1921                masm.zero(scratch.writable())?;
1922                let zero = RegImm::reg(scratch.inner());
1923
1924                for step in (start..end).step_by(word_size as usize) {
1925                    let slot = LocalSlot::i64(step + word_size);
1926                    let addr: Self::Address = masm.local_address(&slot)?;
1927                    masm.store(zero, addr, OperandSize::S64)?;
1928                }
1929                wasmtime_environ::error::Ok(())
1930            })?;
1931        }
1932
1933        Ok(())
1934    }
1935
1936    /// Generate a label.
1937    fn get_label(&mut self) -> Result<MachLabel>;
1938
1939    /// Bind the given label at the current code offset.
1940    fn bind(&mut self, label: MachLabel) -> Result<()>;
1941
1942    /// Conditional branch.
1943    ///
1944    /// Performs a comparison between the two operands,
1945    /// and immediately after emits a jump to the given
1946    /// label destination if the condition is met.
1947    fn branch(
1948        &mut self,
1949        kind: IntCmpKind,
1950        lhs: Reg,
1951        rhs: RegImm,
1952        taken: MachLabel,
1953        size: OperandSize,
1954    ) -> Result<()>;
1955
1956    /// Emits and unconditional jump to the given label.
1957    fn jmp(&mut self, target: MachLabel) -> Result<()>;
1958
1959    /// Emits a jump table sequence. The default label is specified as
1960    /// the last element of the targets slice.
1961    fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1962
1963    /// Emit an unreachable code trap.
1964    fn unreachable(&mut self) -> Result<()>;
1965
1966    /// Emit an unconditional trap.
1967    fn trap(&mut self, code: TrapCode) -> Result<()>;
1968
1969    /// Traps if the condition code is met.
1970    fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1971
1972    /// Trap if the source register is zero.
1973    fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1974
1975    /// Ensures that the stack pointer is correctly positioned before an unconditional
1976    /// jump according to the requirements of the destination target.
1977    fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1978        let bytes = self
1979            .sp_offset()?
1980            .as_u32()
1981            .checked_sub(target.as_u32())
1982            .unwrap_or(0);
1983
1984        if bytes > 0 {
1985            self.free_stack(bytes)?;
1986        }
1987
1988        Ok(())
1989    }
1990
1991    /// Mark the start of a source location returning the machine code offset
1992    /// and the relative source code location.
1993    fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1994
1995    /// Mark the end of a source location.
1996    fn end_source_loc(&mut self) -> Result<()>;
1997
1998    /// The current offset, in bytes from the beginning of the function.
1999    fn current_code_offset(&self) -> Result<CodeOffset>;
2000
2001    /// Performs a 128-bit addition
2002    fn add128(
2003        &mut self,
2004        dst_lo: WritableReg,
2005        dst_hi: WritableReg,
2006        lhs_lo: Reg,
2007        lhs_hi: Reg,
2008        rhs_lo: Reg,
2009        rhs_hi: Reg,
2010    ) -> Result<()>;
2011
2012    /// Performs a 128-bit subtraction
2013    fn sub128(
2014        &mut self,
2015        dst_lo: WritableReg,
2016        dst_hi: WritableReg,
2017        lhs_lo: Reg,
2018        lhs_hi: Reg,
2019        rhs_lo: Reg,
2020        rhs_hi: Reg,
2021    ) -> Result<()>;
2022
2023    /// Performs a widening multiplication from two 64-bit operands into a
2024    /// 128-bit result.
2025    ///
2026    /// Note that some platforms require special handling of registers in this
2027    /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
2028    fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2029    -> Result<()>;
2030
2031    /// Takes the value in a src operand and replicates it across lanes of
2032    /// `size` in a destination result.
2033    fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2034
2035    /// Performs a shuffle between two 128-bit vectors into a 128-bit result
2036    /// using lanes as a mask to select which indexes to copy.
2037    fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2038
2039    /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
2040    fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2041
2042    /// Performs the RMW `op` operation on the passed `addr`.
2043    ///
2044    /// The value *before* the operation was performed is written back to the `operand` register.
2045    fn atomic_rmw(
2046        &mut self,
2047        context: &mut CodeGenContext<Emission>,
2048        addr: Self::Address,
2049        size: OperandSize,
2050        op: RmwOp,
2051        flags: MemFlags,
2052        extend: Option<Extend<Zero>>,
2053    ) -> Result<()>;
2054
2055    /// Extracts the scalar value from `src` in `lane` to `dst`.
2056    fn extract_lane(
2057        &mut self,
2058        src: Reg,
2059        dst: WritableReg,
2060        lane: u8,
2061        kind: ExtractLaneKind,
2062    ) -> Result<()>;
2063
2064    /// Replaces the value in `lane` in `dst` with the value in `src`.
2065    fn replace_lane(
2066        &mut self,
2067        src: RegImm,
2068        dst: WritableReg,
2069        lane: u8,
2070        kind: ReplaceLaneKind,
2071    ) -> Result<()>;
2072
2073    /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2074    /// and `replacement` (at the top of the context's stack).
2075    ///
2076    /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2077    /// expect parameters in specific registers. The context stack contains the `replacement`,
2078    /// and `expected` values in that order. The implementer is expected to push the value at
2079    /// `addr` before the update to the context's stack before returning.
2080    fn atomic_cas(
2081        &mut self,
2082        context: &mut CodeGenContext<Emission>,
2083        addr: Self::Address,
2084        size: OperandSize,
2085        flags: MemFlags,
2086        extend: Option<Extend<Zero>>,
2087    ) -> Result<()>;
2088
2089    /// Compares vector registers `lhs` and `rhs` for equality and puts the
2090    /// vector of results in `dst`.
2091    fn v128_eq(
2092        &mut self,
2093        dst: WritableReg,
2094        lhs: Reg,
2095        rhs: Reg,
2096        kind: VectorEqualityKind,
2097    ) -> Result<()>;
2098
2099    /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2100    /// vector of results in `dst`.
2101    fn v128_ne(
2102        &mut self,
2103        dst: WritableReg,
2104        lhs: Reg,
2105        rhs: Reg,
2106        kind: VectorEqualityKind,
2107    ) -> Result<()>;
2108
2109    /// Performs a less than comparison with vector registers `lhs` and `rhs`
2110    /// and puts the vector of results in `dst`.
2111    fn v128_lt(
2112        &mut self,
2113        dst: WritableReg,
2114        lhs: Reg,
2115        rhs: Reg,
2116        kind: VectorCompareKind,
2117    ) -> Result<()>;
2118
2119    /// Performs a less than or equal comparison with vector registers `lhs`
2120    /// and `rhs` and puts the vector of results in `dst`.
2121    fn v128_le(
2122        &mut self,
2123        dst: WritableReg,
2124        lhs: Reg,
2125        rhs: Reg,
2126        kind: VectorCompareKind,
2127    ) -> Result<()>;
2128
2129    /// Performs a greater than comparison with vector registers `lhs` and
2130    /// `rhs` and puts the vector of results in `dst`.
2131    fn v128_gt(
2132        &mut self,
2133        dst: WritableReg,
2134        lhs: Reg,
2135        rhs: Reg,
2136        kind: VectorCompareKind,
2137    ) -> Result<()>;
2138
2139    /// Performs a greater than or equal comparison with vector registers `lhs`
2140    /// and `rhs` and puts the vector of results in `dst`.
2141    fn v128_ge(
2142        &mut self,
2143        dst: WritableReg,
2144        lhs: Reg,
2145        rhs: Reg,
2146        kind: VectorCompareKind,
2147    ) -> Result<()>;
2148
2149    /// Emit a memory fence.
2150    fn fence(&mut self) -> Result<()>;
2151
2152    /// Perform a logical `not` operation on the 128bits vector value in `dst`.
2153    fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2154
2155    /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2156    /// the result to `dst`.
2157    fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2158
2159    /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2160    /// the result to `dst`.
2161    ///
2162    /// `and_not` is not commutative: dst = !src1 & src2.
2163    fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2164
2165    /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2166    /// the result to `dst`.
2167    fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2168
2169    /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2170    /// the result to `dst`.
2171    fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2172
2173    /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2174    /// from `src1` when mask is 1, and from `src2` when mask is 0.
2175    ///
2176    /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2177    fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2178
2179    /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2180    fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2181
2182    /// Convert vector of integers to vector of floating points.
2183    fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2184
2185    /// Convert two input vectors into a smaller lane vector by narrowing each
2186    /// lane.
2187    fn v128_narrow(
2188        &mut self,
2189        src1: Reg,
2190        src2: Reg,
2191        dst: WritableReg,
2192        kind: V128NarrowKind,
2193    ) -> Result<()>;
2194
2195    /// Converts a vector containing two 64-bit floating point lanes to two
2196    /// 32-bit floating point lanes and setting the two higher lanes to 0.
2197    fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2198
2199    /// Converts a vector containing four 32-bit floating point lanes to two
2200    /// 64-bit floating point lanes. Only the two lower lanes are converted.
2201    fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2202
2203    /// Converts low or high half of the smaller lane vector to a larger lane
2204    /// vector.
2205    fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2206
2207    /// Perform a vector add between `lsh` and `rhs`, placing the result in
2208    /// `dst`.
2209    fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2210
2211    /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2212    fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2213
2214    /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2215    fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2216    -> Result<()>;
2217
2218    /// Perform an absolute operation on a vector.
2219    fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2220
2221    /// Vectorized negate of the content of `op`.
2222    fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2223
2224    /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2225    /// integer at the top of the stack, on the 128-bit vector specified by the second value
2226    /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2227    ///
2228    /// The shift amount is taken modulo `lane_width`.
2229    fn v128_shift(
2230        &mut self,
2231        context: &mut CodeGenContext<Emission>,
2232        lane_width: OperandSize,
2233        kind: ShiftKind,
2234    ) -> Result<()>;
2235
2236    /// Perform a saturating integer q-format rounding multiplication.
2237    fn v128_q15mulr_sat_s(
2238        &mut self,
2239        lhs: Reg,
2240        rhs: Reg,
2241        dst: WritableReg,
2242        size: OperandSize,
2243    ) -> Result<()>;
2244
2245    /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2246    /// otherwise.
2247    fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2248
2249    /// Extracts the high bit of each lane in `src` and produces a scalar mask
2250    /// with all bits concatenated in `dst`.
2251    fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2252
2253    /// Lanewise truncation operation.
2254    ///
2255    /// If using an integer kind of truncation, then this performs a lane-wise
2256    /// saturating conversion from float to integer using the IEEE
2257    /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2258    /// resulting lane is 0. If the rounded integer value of a lane is outside
2259    /// the range of the destination type, the result is saturated to the
2260    /// nearest representable integer value.
2261    fn v128_trunc(
2262        &mut self,
2263        context: &mut CodeGenContext<Emission>,
2264        kind: V128TruncKind,
2265    ) -> Result<()>;
2266
2267    /// Perform a lane-wise `min` operation between `src1` and `src2`.
2268    fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2269    -> Result<()>;
2270
2271    /// Perform a lane-wise `max` operation between `src1` and `src2`.
2272    fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2273    -> Result<()>;
2274
2275    /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2276    /// inputs. This is equivalent to an extend followed by a multiply.
2277    ///
2278    /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2279    /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2280    /// 8bits of the 16bits lanes.
2281    fn v128_extmul(
2282        &mut self,
2283        context: &mut CodeGenContext<Emission>,
2284        kind: V128ExtMulKind,
2285    ) -> Result<()>;
2286
2287    /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2288    /// wider results than the inputs).
2289    fn v128_extadd_pairwise(
2290        &mut self,
2291        src: Reg,
2292        dst: WritableReg,
2293        kind: V128ExtAddKind,
2294    ) -> Result<()>;
2295
2296    /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2297    /// adjacent pairs of the 32-bit results.
2298    fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2299
2300    /// Count the number of bits set in each lane.
2301    fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2302
2303    /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2304    /// and put the results in `dst`.
2305    fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2306
2307    /// Lane-wise IEEE division on vectors of floats.
2308    fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2309
2310    /// Lane-wise IEEE square root of vector of floats.
2311    fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2312
2313    /// Lane-wise ceiling of vector of floats.
2314    fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2315
2316    /// Lane-wise flooring of vector of floats.
2317    fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2318
2319    /// Lane-wise rounding to nearest integer for vector of floats.
2320    fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2321
2322    /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2323    fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2324
2325    /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2326    fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2327}
winch_codegen/masm.rs

winch_codegen/
masm.rs