winch_codegen/masm.rs
1use crate::Result;
2use crate::abi::{self, LocalSlot, align_to};
3use crate::codegen::{CodeGenContext, Emission, FuncEnv};
4use crate::isa::{
5 CallingConvention,
6 reg::{Reg, RegClass, WritableReg, writable},
7};
8use cranelift_codegen::{
9 Final, MachBufferFinalized, MachLabel,
10 binemit::CodeOffset,
11 ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12};
13use std::{fmt::Debug, ops::Range};
14use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15
16pub(crate) use cranelift_codegen::ir::TrapCode;
17
18#[derive(Eq, PartialEq)]
19pub(crate) enum DivKind {
20 /// Signed division.
21 Signed,
22 /// Unsigned division.
23 Unsigned,
24}
25
26/// Represents the `memory.atomic.wait*` kind.
27#[derive(Debug, Clone, Copy)]
28pub(crate) enum AtomicWaitKind {
29 Wait32,
30 Wait64,
31}
32
33/// Remainder kind.
34#[derive(Copy, Clone)]
35pub(crate) enum RemKind {
36 /// Signed remainder.
37 Signed,
38 /// Unsigned remainder.
39 Unsigned,
40}
41
42impl RemKind {
43 pub fn is_signed(&self) -> bool {
44 matches!(self, Self::Signed)
45 }
46}
47
48/// Kinds of vector min operation supported by WebAssembly.
49pub(crate) enum V128MinKind {
50 /// 4 lanes of 32-bit floats.
51 F32x4,
52 /// 2 lanes of 64-bit floats.
53 F64x2,
54 /// 16 lanes of signed 8-bit integers.
55 I8x16S,
56 /// 16 lanes of unsigned 8-bit integers.
57 I8x16U,
58 /// 8 lanes of signed 16-bit integers.
59 I16x8S,
60 /// 8 lanes of unsigned 16-bit integers.
61 I16x8U,
62 /// 4 lanes of signed 32-bit integers.
63 I32x4S,
64 /// 4 lanes of unsigned 32-bit integers.
65 I32x4U,
66}
67
68impl V128MinKind {
69 /// The size of each lane.
70 pub(crate) fn lane_size(&self) -> OperandSize {
71 match self {
72 Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73 Self::F64x2 => OperandSize::S64,
74 Self::I8x16S | Self::I8x16U => OperandSize::S8,
75 Self::I16x8S | Self::I16x8U => OperandSize::S16,
76 }
77 }
78}
79
80/// Kinds of vector max operation supported by WebAssembly.
81pub(crate) enum V128MaxKind {
82 /// 4 lanes of 32-bit floats.
83 F32x4,
84 /// 2 lanes of 64-bit floats.
85 F64x2,
86 /// 16 lanes of signed 8-bit integers.
87 I8x16S,
88 /// 16 lanes of unsigned 8-bit integers.
89 I8x16U,
90 /// 8 lanes of signed 16-bit integers.
91 I16x8S,
92 /// 8 lanes of unsigned 16-bit integers.
93 I16x8U,
94 /// 4 lanes of signed 32-bit integers.
95 I32x4S,
96 /// 4 lanes of unsigned 32-bit integers.
97 I32x4U,
98}
99
100impl V128MaxKind {
101 /// The size of each lane.
102 pub(crate) fn lane_size(&self) -> OperandSize {
103 match self {
104 Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105 Self::F64x2 => OperandSize::S64,
106 Self::I8x16S | Self::I8x16U => OperandSize::S8,
107 Self::I16x8S | Self::I16x8U => OperandSize::S16,
108 }
109 }
110}
111
112#[derive(Eq, PartialEq)]
113pub(crate) enum MulWideKind {
114 Signed,
115 Unsigned,
116}
117
118/// Type of operation for a read-modify-write instruction.
119pub(crate) enum RmwOp {
120 Add,
121 Sub,
122 Xchg,
123 And,
124 Or,
125 Xor,
126}
127
128/// The direction to perform the memory move.
129#[derive(Debug, Clone, Eq, PartialEq)]
130pub(crate) enum MemMoveDirection {
131 /// From high memory addresses to low memory addresses.
132 /// Invariant: the source location is closer to the FP than the destination
133 /// location, which will be closer to the SP.
134 HighToLow,
135 /// From low memory addresses to high memory addresses.
136 /// Invariant: the source location is closer to the SP than the destination
137 /// location, which will be closer to the FP.
138 LowToHigh,
139}
140
141/// Classifies how to treat float-to-int conversions.
142#[derive(Debug, Copy, Clone, Eq, PartialEq)]
143pub(crate) enum TruncKind {
144 /// Saturating conversion. If the source value is greater than the maximum
145 /// value of the destination type, the result is clamped to the
146 /// destination maximum value.
147 Checked,
148 /// An exception is raised if the source value is greater than the maximum
149 /// value of the destination type.
150 Unchecked,
151}
152
153impl TruncKind {
154 /// Returns true if the truncation kind is checked.
155 pub(crate) fn is_checked(&self) -> bool {
156 *self == TruncKind::Checked
157 }
158
159 /// Returns `true` if the trunc kind is [`Unchecked`].
160 ///
161 /// [`Unchecked`]: TruncKind::Unchecked
162 #[must_use]
163 pub(crate) fn is_unchecked(&self) -> bool {
164 matches!(self, Self::Unchecked)
165 }
166}
167
168/// Representation of the stack pointer offset.
169#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170pub struct SPOffset(u32);
171
172impl SPOffset {
173 pub fn from_u32(offs: u32) -> Self {
174 Self(offs)
175 }
176
177 pub fn as_u32(&self) -> u32 {
178 self.0
179 }
180}
181
182/// A stack slot.
183#[derive(Debug, Clone, Copy, Eq, PartialEq)]
184pub struct StackSlot {
185 /// The location of the slot, relative to the stack pointer.
186 pub offset: SPOffset,
187 /// The size of the slot, in bytes.
188 pub size: u32,
189}
190
191impl StackSlot {
192 pub fn new(offs: SPOffset, size: u32) -> Self {
193 Self { offset: offs, size }
194 }
195}
196
197pub trait ScratchType {
198 /// Derive the register class from the scratch register type.
199 fn reg_class() -> RegClass;
200}
201
202/// A scratch register type of integer class.
203pub struct IntScratch;
204/// A scratch register type of floating point class.
205pub struct FloatScratch;
206
207impl ScratchType for IntScratch {
208 fn reg_class() -> RegClass {
209 RegClass::Int
210 }
211}
212
213impl ScratchType for FloatScratch {
214 fn reg_class() -> RegClass {
215 RegClass::Float
216 }
217}
218
219/// A scratch register scope.
220#[derive(Debug, Clone, Copy)]
221pub struct Scratch(Reg);
222
223impl Scratch {
224 pub fn new(r: Reg) -> Self {
225 Self(r)
226 }
227
228 #[inline]
229 pub fn inner(&self) -> Reg {
230 self.0
231 }
232
233 #[inline]
234 pub fn writable(&self) -> WritableReg {
235 writable!(self.0)
236 }
237}
238
239/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
240/// implementation for each ISA is responsible for emitting the correct
241/// sequence of instructions when lowering to machine code.
242#[derive(Debug, Clone, Copy, Eq, PartialEq)]
243pub(crate) enum IntCmpKind {
244 /// Equal.
245 Eq,
246 /// Not equal.
247 Ne,
248 /// Signed less than.
249 LtS,
250 /// Unsigned less than.
251 LtU,
252 /// Signed greater than.
253 GtS,
254 /// Unsigned greater than.
255 GtU,
256 /// Signed less than or equal.
257 LeS,
258 /// Unsigned less than or equal.
259 LeU,
260 /// Signed greater than or equal.
261 GeS,
262 /// Unsigned greater than or equal.
263 GeU,
264}
265
266/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
267/// implementation for each ISA is responsible for emitting the correct
268/// sequence of instructions when lowering code.
269#[derive(Debug)]
270pub(crate) enum FloatCmpKind {
271 /// Equal.
272 Eq,
273 /// Not equal.
274 Ne,
275 /// Less than.
276 Lt,
277 /// Greater than.
278 Gt,
279 /// Less than or equal.
280 Le,
281 /// Greater than or equal.
282 Ge,
283}
284
285/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
286/// responsible for emitting the correct sequence of instructions when
287/// lowering to machine code.
288#[derive(Debug, Clone, Copy, Eq, PartialEq)]
289pub(crate) enum ShiftKind {
290 /// Left shift.
291 Shl,
292 /// Signed right shift.
293 ShrS,
294 /// Unsigned right shift.
295 ShrU,
296 /// Left rotate.
297 Rotl,
298 /// Right rotate.
299 Rotr,
300}
301
302/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
303/// is responsible for emitting the correct sequence of instructions when
304/// lowering to machine code.
305#[derive(Copy, Clone)]
306pub(crate) enum ExtendKind {
307 Signed(Extend<Signed>),
308 Unsigned(Extend<Zero>),
309}
310
311#[derive(Copy, Clone)]
312pub(crate) enum Signed {}
313#[derive(Copy, Clone)]
314pub(crate) enum Zero {}
315
316pub(crate) trait ExtendType {}
317
318impl ExtendType for Signed {}
319impl ExtendType for Zero {}
320
321#[derive(Copy, Clone)]
322pub(crate) enum Extend<T: ExtendType> {
323 /// 8 to 32 bit extend.
324 I32Extend8,
325 /// 16 to 32 bit extend.
326 I32Extend16,
327 /// 8 to 64 bit extend.
328 I64Extend8,
329 /// 16 to 64 bit extend.
330 I64Extend16,
331 /// 32 to 64 bit extend.
332 I64Extend32,
333
334 /// Variant to hold the kind of extend marker.
335 ///
336 /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
337 /// constructed.
338 __Kind(T),
339}
340
341impl From<Extend<Zero>> for ExtendKind {
342 fn from(value: Extend<Zero>) -> Self {
343 ExtendKind::Unsigned(value)
344 }
345}
346
347impl<T: ExtendType> Extend<T> {
348 pub fn from_size(&self) -> OperandSize {
349 match self {
350 Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
351 Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
352 Extend::I64Extend32 => OperandSize::S32,
353 Extend::__Kind(_) => unreachable!(),
354 }
355 }
356
357 pub fn to_size(&self) -> OperandSize {
358 match self {
359 Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
360 Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
361 Extend::__Kind(_) => unreachable!(),
362 }
363 }
364
365 pub fn from_bits(&self) -> u8 {
366 self.from_size().num_bits()
367 }
368
369 pub fn to_bits(&self) -> u8 {
370 self.to_size().num_bits()
371 }
372}
373
374impl From<Extend<Signed>> for ExtendKind {
375 fn from(value: Extend<Signed>) -> Self {
376 ExtendKind::Signed(value)
377 }
378}
379
380impl ExtendKind {
381 pub fn signed(&self) -> bool {
382 match self {
383 Self::Signed(_) => true,
384 _ => false,
385 }
386 }
387
388 pub fn from_bits(&self) -> u8 {
389 match self {
390 Self::Signed(s) => s.from_bits(),
391 Self::Unsigned(u) => u.from_bits(),
392 }
393 }
394
395 pub fn to_bits(&self) -> u8 {
396 match self {
397 Self::Signed(s) => s.to_bits(),
398 Self::Unsigned(u) => u.to_bits(),
399 }
400 }
401}
402
403/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
404/// implementation is responsible for emitting the correct sequence of
405/// instructions when lowering to machine code.
406#[derive(Copy, Clone)]
407pub(crate) enum V128LoadExtendKind {
408 /// Sign extends eight 8 bit integers to eight 16 bit lanes.
409 E8x8S,
410 /// Zero extends eight 8 bit integers to eight 16 bit lanes.
411 E8x8U,
412 /// Sign extends four 16 bit integers to four 32 bit lanes.
413 E16x4S,
414 /// Zero extends four 16 bit integers to four 32 bit lanes.
415 E16x4U,
416 /// Sign extends two 32 bit integers to two 64 bit lanes.
417 E32x2S,
418 /// Zero extends two 32 bit integers to two 64 bit lanes.
419 E32x2U,
420}
421
422/// Kinds of splat loads supported by WebAssembly.
423pub(crate) enum SplatLoadKind {
424 /// 8 bits.
425 S8,
426 /// 16 bits.
427 S16,
428 /// 32 bits.
429 S32,
430 /// 64 bits.
431 S64,
432}
433
434/// Kinds of splat supported by WebAssembly.
435#[derive(Copy, Debug, Clone, Eq, PartialEq)]
436pub(crate) enum SplatKind {
437 /// 8 bit integer.
438 I8x16,
439 /// 16 bit integer.
440 I16x8,
441 /// 32 bit integer.
442 I32x4,
443 /// 64 bit integer.
444 I64x2,
445 /// 32 bit float.
446 F32x4,
447 /// 64 bit float.
448 F64x2,
449}
450
451impl SplatKind {
452 /// The lane size to use for different kinds of splats.
453 pub(crate) fn lane_size(&self) -> OperandSize {
454 match self {
455 SplatKind::I8x16 => OperandSize::S8,
456 SplatKind::I16x8 => OperandSize::S16,
457 SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
458 SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
459 }
460 }
461}
462
463/// Kinds of extract lane supported by WebAssembly.
464#[derive(Copy, Debug, Clone, Eq, PartialEq)]
465pub(crate) enum ExtractLaneKind {
466 /// 16 lanes of 8-bit integers sign extended to 32-bits.
467 I8x16S,
468 /// 16 lanes of 8-bit integers zero extended to 32-bits.
469 I8x16U,
470 /// 8 lanes of 16-bit integers sign extended to 32-bits.
471 I16x8S,
472 /// 8 lanes of 16-bit integers zero extended to 32-bits.
473 I16x8U,
474 /// 4 lanes of 32-bit integers.
475 I32x4,
476 /// 2 lanes of 64-bit integers.
477 I64x2,
478 /// 4 lanes of 32-bit floats.
479 F32x4,
480 /// 2 lanes of 64-bit floats.
481 F64x2,
482}
483
484impl ExtractLaneKind {
485 /// The lane size to use for different kinds of extract lane kinds.
486 pub(crate) fn lane_size(&self) -> OperandSize {
487 match self {
488 ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
489 ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
490 ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
491 ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
492 }
493 }
494}
495
496impl From<ExtractLaneKind> for Extend<Signed> {
497 fn from(value: ExtractLaneKind) -> Self {
498 match value {
499 ExtractLaneKind::I8x16S => Extend::I32Extend8,
500 ExtractLaneKind::I16x8S => Extend::I32Extend16,
501 _ => unimplemented!(),
502 }
503 }
504}
505
506/// Kinds of replace lane supported by WebAssembly.
507pub(crate) enum ReplaceLaneKind {
508 /// 16 lanes of 8 bit integers.
509 I8x16,
510 /// 8 lanes of 16 bit integers.
511 I16x8,
512 /// 4 lanes of 32 bit integers.
513 I32x4,
514 /// 2 lanes of 64 bit integers.
515 I64x2,
516 /// 4 lanes of 32 bit floats.
517 F32x4,
518 /// 2 lanes of 64 bit floats.
519 F64x2,
520}
521
522impl ReplaceLaneKind {
523 /// The lane size to use for different kinds of replace lane kinds.
524 pub(crate) fn lane_size(&self) -> OperandSize {
525 match self {
526 ReplaceLaneKind::I8x16 => OperandSize::S8,
527 ReplaceLaneKind::I16x8 => OperandSize::S16,
528 ReplaceLaneKind::I32x4 => OperandSize::S32,
529 ReplaceLaneKind::I64x2 => OperandSize::S64,
530 ReplaceLaneKind::F32x4 => OperandSize::S32,
531 ReplaceLaneKind::F64x2 => OperandSize::S64,
532 }
533 }
534}
535
536/// Kinds of behavior supported by Wasm loads.
537pub(crate) enum LoadKind {
538 /// Load the entire bytes of the operand size without any modifications.
539 Operand(OperandSize),
540 /// Atomic load, with optional scalar extend.
541 Atomic(OperandSize, Option<ExtendKind>),
542 /// Duplicate value into vector lanes.
543 Splat(SplatLoadKind),
544 /// Scalar (non-vector) extend.
545 ScalarExtend(ExtendKind),
546 /// Vector extend.
547 VectorExtend(V128LoadExtendKind),
548 /// Load content into select lane.
549 VectorLane(LaneSelector),
550 /// Load a single element into the lowest bits of a vector and initialize
551 /// all other bits to zero.
552 VectorZero(OperandSize),
553}
554
555impl LoadKind {
556 /// Returns the [`OperandSize`] used in the load operation.
557 pub(crate) fn derive_operand_size(&self) -> OperandSize {
558 match self {
559 Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
560 Self::operand_size_for_scalar(extend)
561 }
562 Self::VectorExtend(_) => OperandSize::S64,
563 Self::Splat(kind) => Self::operand_size_for_splat(kind),
564 Self::Operand(size)
565 | Self::Atomic(size, None)
566 | Self::VectorLane(LaneSelector { size, .. })
567 | Self::VectorZero(size) => *size,
568 }
569 }
570
571 pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
572 Self::VectorLane(LaneSelector { lane, size })
573 }
574
575 fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
576 match extend_kind {
577 ExtendKind::Signed(s) => s.from_size(),
578 ExtendKind::Unsigned(u) => u.from_size(),
579 }
580 }
581
582 fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
583 match kind {
584 SplatLoadKind::S8 => OperandSize::S8,
585 SplatLoadKind::S16 => OperandSize::S16,
586 SplatLoadKind::S32 => OperandSize::S32,
587 SplatLoadKind::S64 => OperandSize::S64,
588 }
589 }
590
591 pub(crate) fn is_atomic(&self) -> bool {
592 matches!(self, Self::Atomic(_, _))
593 }
594}
595
596/// Kinds of behavior supported by Wasm loads.
597#[derive(Copy, Clone)]
598pub enum StoreKind {
599 /// Store the entire bytes of the operand size without any modifications.
600 Operand(OperandSize),
601 /// Store the entire bytes of the operand size without any modifications, atomically.
602 Atomic(OperandSize),
603 /// Store the content of selected lane.
604 VectorLane(LaneSelector),
605}
606
607impl StoreKind {
608 pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
609 Self::VectorLane(LaneSelector { lane, size })
610 }
611}
612
613#[derive(Copy, Clone)]
614pub struct LaneSelector {
615 pub lane: u8,
616 pub size: OperandSize,
617}
618
619/// Types of vector integer to float conversions supported by WebAssembly.
620pub(crate) enum V128ConvertKind {
621 /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
622 I32x4S,
623 /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
624 I32x4U,
625 /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
626 /// floats.
627 I32x4LowS,
628 /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
629 /// floats.
630 I32x4LowU,
631}
632
633impl V128ConvertKind {
634 pub(crate) fn src_lane_size(&self) -> OperandSize {
635 match self {
636 V128ConvertKind::I32x4S
637 | V128ConvertKind::I32x4U
638 | V128ConvertKind::I32x4LowS
639 | V128ConvertKind::I32x4LowU => OperandSize::S32,
640 }
641 }
642
643 pub(crate) fn dst_lane_size(&self) -> OperandSize {
644 match self {
645 V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
646 V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
647 }
648 }
649}
650
651/// Kinds of vector narrowing operations supported by WebAssembly.
652pub(crate) enum V128NarrowKind {
653 /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
654 /// signed saturation.
655 I16x8S,
656 /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
657 /// unsigned saturation.
658 I16x8U,
659 /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
660 /// signed saturation.
661 I32x4S,
662 /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
663 /// unsigned saturation.
664 I32x4U,
665}
666
667impl V128NarrowKind {
668 /// Return the size of the destination lanes.
669 pub(crate) fn dst_lane_size(&self) -> OperandSize {
670 match self {
671 Self::I16x8S | Self::I16x8U => OperandSize::S8,
672 Self::I32x4S | Self::I32x4U => OperandSize::S16,
673 }
674 }
675}
676
677/// Kinds of vector extending operations supported by WebAssembly.
678#[derive(Debug, Copy, Clone)]
679pub(crate) enum V128ExtendKind {
680 /// Low half of i8x16 sign extended.
681 LowI8x16S,
682 /// High half of i8x16 sign extended.
683 HighI8x16S,
684 /// Low half of i8x16 zero extended.
685 LowI8x16U,
686 /// High half of i8x16 zero extended.
687 HighI8x16U,
688 /// Low half of i16x8 sign extended.
689 LowI16x8S,
690 /// High half of i16x8 sign extended.
691 HighI16x8S,
692 /// Low half of i16x8 zero extended.
693 LowI16x8U,
694 /// High half of i16x8 zero extended.
695 HighI16x8U,
696 /// Low half of i32x4 sign extended.
697 LowI32x4S,
698 /// High half of i32x4 sign extended.
699 HighI32x4S,
700 /// Low half of i32x4 zero extended.
701 LowI32x4U,
702 /// High half of i32x4 zero extended.
703 HighI32x4U,
704}
705
706impl V128ExtendKind {
707 /// The size of the source's lanes.
708 pub(crate) fn src_lane_size(&self) -> OperandSize {
709 match self {
710 Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
711 OperandSize::S8
712 }
713 Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
714 OperandSize::S16
715 }
716 Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
717 OperandSize::S32
718 }
719 }
720 }
721}
722
723/// Kinds of vector equalities and non-equalities supported by WebAssembly.
724pub(crate) enum VectorEqualityKind {
725 /// 16 lanes of 8 bit integers.
726 I8x16,
727 /// 8 lanes of 16 bit integers.
728 I16x8,
729 /// 4 lanes of 32 bit integers.
730 I32x4,
731 /// 2 lanes of 64 bit integers.
732 I64x2,
733 /// 4 lanes of 32 bit floats.
734 F32x4,
735 /// 2 lanes of 64 bit floats.
736 F64x2,
737}
738
739impl VectorEqualityKind {
740 /// Get the lane size to use.
741 pub(crate) fn lane_size(&self) -> OperandSize {
742 match self {
743 Self::I8x16 => OperandSize::S8,
744 Self::I16x8 => OperandSize::S16,
745 Self::I32x4 | Self::F32x4 => OperandSize::S32,
746 Self::I64x2 | Self::F64x2 => OperandSize::S64,
747 }
748 }
749}
750
751/// Kinds of vector comparisons supported by WebAssembly.
752pub(crate) enum VectorCompareKind {
753 /// 16 lanes of signed 8 bit integers.
754 I8x16S,
755 /// 16 lanes of unsigned 8 bit integers.
756 I8x16U,
757 /// 8 lanes of signed 16 bit integers.
758 I16x8S,
759 /// 8 lanes of unsigned 16 bit integers.
760 I16x8U,
761 /// 4 lanes of signed 32 bit integers.
762 I32x4S,
763 /// 4 lanes of unsigned 32 bit integers.
764 I32x4U,
765 /// 2 lanes of signed 64 bit integers.
766 I64x2S,
767 /// 4 lanes of 32 bit floats.
768 F32x4,
769 /// 2 lanes of 64 bit floats.
770 F64x2,
771}
772
773impl VectorCompareKind {
774 /// Get the lane size to use.
775 pub(crate) fn lane_size(&self) -> OperandSize {
776 match self {
777 Self::I8x16S | Self::I8x16U => OperandSize::S8,
778 Self::I16x8S | Self::I16x8U => OperandSize::S16,
779 Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
780 Self::I64x2S | Self::F64x2 => OperandSize::S64,
781 }
782 }
783}
784
785/// Kinds of vector absolute operations supported by WebAssembly.
786#[derive(Copy, Debug, Clone, Eq, PartialEq)]
787pub(crate) enum V128AbsKind {
788 /// 8 bit integers.
789 I8x16,
790 /// 16 bit integers.
791 I16x8,
792 /// 32 bit integers.
793 I32x4,
794 /// 64 bit integers.
795 I64x2,
796 /// 32 bit floats.
797 F32x4,
798 /// 64 bit floats.
799 F64x2,
800}
801
802impl V128AbsKind {
803 /// The lane size to use.
804 pub(crate) fn lane_size(&self) -> OperandSize {
805 match self {
806 Self::I8x16 => OperandSize::S8,
807 Self::I16x8 => OperandSize::S16,
808 Self::I32x4 | Self::F32x4 => OperandSize::S32,
809 Self::I64x2 | Self::F64x2 => OperandSize::S64,
810 }
811 }
812}
813
814/// Kinds of truncation for vectors supported by WebAssembly.
815pub(crate) enum V128TruncKind {
816 /// Truncates 4 lanes of 32-bit floats to nearest integral value.
817 F32x4,
818 /// Truncates 2 lanes of 64-bit floats to nearest integral value.
819 F64x2,
820 /// Integers from signed F32x4.
821 I32x4FromF32x4S,
822 /// Integers from unsigned F32x4.
823 I32x4FromF32x4U,
824 /// Integers from signed F64x2.
825 I32x4FromF64x2SZero,
826 /// Integers from unsigned F64x2.
827 I32x4FromF64x2UZero,
828}
829
830impl V128TruncKind {
831 /// The size of the source lanes.
832 pub(crate) fn src_lane_size(&self) -> OperandSize {
833 match self {
834 V128TruncKind::F32x4
835 | V128TruncKind::I32x4FromF32x4S
836 | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
837 V128TruncKind::F64x2
838 | V128TruncKind::I32x4FromF64x2SZero
839 | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
840 }
841 }
842
843 /// The size of the destination lanes.
844 pub(crate) fn dst_lane_size(&self) -> OperandSize {
845 if let V128TruncKind::F64x2 = self {
846 OperandSize::S64
847 } else {
848 OperandSize::S32
849 }
850 }
851}
852
853/// Kinds of vector addition supported by WebAssembly.
854pub(crate) enum V128AddKind {
855 /// 4 lanes of 32-bit floats wrapping.
856 F32x4,
857 /// 2 lanes of 64-bit floats wrapping.
858 F64x2,
859 /// 16 lanes of 8-bit integers wrapping.
860 I8x16,
861 /// 16 lanes of 8-bit integers signed saturating.
862 I8x16SatS,
863 /// 16 lanes of 8-bit integers unsigned saturating.
864 I8x16SatU,
865 /// 8 lanes of 16-bit integers wrapping.
866 I16x8,
867 /// 8 lanes of 16-bit integers signed saturating.
868 I16x8SatS,
869 /// 8 lanes of 16-bit integers unsigned saturating.
870 I16x8SatU,
871 /// 4 lanes of 32-bit integers wrapping.
872 I32x4,
873 /// 2 lanes of 64-bit integers wrapping.
874 I64x2,
875}
876
877/// Kinds of vector subtraction supported by WebAssembly.
878pub(crate) enum V128SubKind {
879 /// 4 lanes of 32-bit floats wrapping.
880 F32x4,
881 /// 2 lanes of 64-bit floats wrapping.
882 F64x2,
883 /// 16 lanes of 8-bit integers wrapping.
884 I8x16,
885 /// 16 lanes of 8-bit integers signed saturating.
886 I8x16SatS,
887 /// 16 lanes of 8-bit integers unsigned saturating.
888 I8x16SatU,
889 /// 8 lanes of 16-bit integers wrapping.
890 I16x8,
891 /// 8 lanes of 16-bit integers signed saturating.
892 I16x8SatS,
893 /// 8 lanes of 16-bit integers unsigned saturating.
894 I16x8SatU,
895 /// 4 lanes of 32-bit integers wrapping.
896 I32x4,
897 /// 2 lanes of 64-bit integers wrapping.
898 I64x2,
899}
900
901impl From<V128NegKind> for V128SubKind {
902 fn from(value: V128NegKind) -> Self {
903 match value {
904 V128NegKind::I8x16 => Self::I8x16,
905 V128NegKind::I16x8 => Self::I16x8,
906 V128NegKind::I32x4 => Self::I32x4,
907 V128NegKind::I64x2 => Self::I64x2,
908 V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
909 }
910 }
911}
912
913/// Kinds of vector multiplication supported by WebAssembly.
914pub(crate) enum V128MulKind {
915 /// 4 lanes of 32-bit floats.
916 F32x4,
917 /// 2 lanes of 64-bit floats.
918 F64x2,
919 /// 8 lanes of 16-bit integers.
920 I16x8,
921 /// 4 lanes of 32-bit integers.
922 I32x4,
923 /// 2 lanes of 64-bit integers.
924 I64x2,
925}
926
927/// Kinds of vector negation supported by WebAssembly.
928#[derive(Copy, Clone)]
929pub(crate) enum V128NegKind {
930 /// 4 lanes of 32-bit floats.
931 F32x4,
932 /// 2 lanes of 64-bit floats.
933 F64x2,
934 /// 16 lanes of 8-bit integers.
935 I8x16,
936 /// 8 lanes of 16-bit integers.
937 I16x8,
938 /// 4 lanes of 32-bit integers.
939 I32x4,
940 /// 2 lanes of 64-bit integers.
941 I64x2,
942}
943
944impl V128NegKind {
945 /// The size of the lanes.
946 pub(crate) fn lane_size(&self) -> OperandSize {
947 match self {
948 Self::F32x4 | Self::I32x4 => OperandSize::S32,
949 Self::F64x2 | Self::I64x2 => OperandSize::S64,
950 Self::I8x16 => OperandSize::S8,
951 Self::I16x8 => OperandSize::S16,
952 }
953 }
954}
955
956/// Kinds of extended pairwise addition supported by WebAssembly.
957pub(crate) enum V128ExtAddKind {
958 /// 16 lanes of signed 8-bit integers.
959 I8x16S,
960 /// 16 lanes of unsigned 8-bit integers.
961 I8x16U,
962 /// 8 lanes of signed 16-bit integers.
963 I16x8S,
964 /// 8 lanes of unsigned 16-bit integers.
965 I16x8U,
966}
967
968/// Kinds of vector extended multiplication supported by WebAssembly.
969#[derive(Debug, Clone, Copy)]
970pub(crate) enum V128ExtMulKind {
971 LowI8x16S,
972 HighI8x16S,
973 LowI8x16U,
974 HighI8x16U,
975 LowI16x8S,
976 HighI16x8S,
977 LowI16x8U,
978 HighI16x8U,
979 LowI32x4S,
980 HighI32x4S,
981 LowI32x4U,
982 HighI32x4U,
983}
984
985impl From<V128ExtMulKind> for V128ExtendKind {
986 fn from(value: V128ExtMulKind) -> Self {
987 match value {
988 V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
989 V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
990 V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
991 V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
992 V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
993 V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
994 V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
995 V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
996 V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
997 V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
998 V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
999 V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
1000 }
1001 }
1002}
1003
1004impl From<V128ExtMulKind> for V128MulKind {
1005 fn from(value: V128ExtMulKind) -> Self {
1006 match value {
1007 V128ExtMulKind::LowI8x16S
1008 | V128ExtMulKind::HighI8x16S
1009 | V128ExtMulKind::LowI8x16U
1010 | V128ExtMulKind::HighI8x16U => Self::I16x8,
1011 V128ExtMulKind::LowI16x8S
1012 | V128ExtMulKind::HighI16x8S
1013 | V128ExtMulKind::LowI16x8U
1014 | V128ExtMulKind::HighI16x8U => Self::I32x4,
1015 V128ExtMulKind::LowI32x4S
1016 | V128ExtMulKind::HighI32x4S
1017 | V128ExtMulKind::LowI32x4U
1018 | V128ExtMulKind::HighI32x4U => Self::I64x2,
1019 }
1020 }
1021}
1022
1023/// Operand size, in bits.
1024#[derive(Copy, Debug, Clone, Eq, PartialEq)]
1025pub(crate) enum OperandSize {
1026 /// 8 bits.
1027 S8,
1028 /// 16 bits.
1029 S16,
1030 /// 32 bits.
1031 S32,
1032 /// 64 bits.
1033 S64,
1034 /// 128 bits.
1035 S128,
1036}
1037
1038impl OperandSize {
1039 /// The number of bits in the operand.
1040 pub fn num_bits(&self) -> u8 {
1041 match self {
1042 OperandSize::S8 => 8,
1043 OperandSize::S16 => 16,
1044 OperandSize::S32 => 32,
1045 OperandSize::S64 => 64,
1046 OperandSize::S128 => 128,
1047 }
1048 }
1049
1050 /// The number of bytes in the operand.
1051 pub fn bytes(&self) -> u32 {
1052 match self {
1053 Self::S8 => 1,
1054 Self::S16 => 2,
1055 Self::S32 => 4,
1056 Self::S64 => 8,
1057 Self::S128 => 16,
1058 }
1059 }
1060
1061 /// The binary logarithm of the number of bits in the operand.
1062 pub fn log2(&self) -> u8 {
1063 match self {
1064 OperandSize::S8 => 3,
1065 OperandSize::S16 => 4,
1066 OperandSize::S32 => 5,
1067 OperandSize::S64 => 6,
1068 OperandSize::S128 => 7,
1069 }
1070 }
1071
1072 /// Create an [`OperandSize`] from the given number of bytes.
1073 pub fn from_bytes(bytes: u8) -> Self {
1074 use OperandSize::*;
1075 match bytes {
1076 4 => S32,
1077 8 => S64,
1078 16 => S128,
1079 _ => panic!("Invalid bytes {bytes} for OperandSize"),
1080 }
1081 }
1082
1083 pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1084 match to {
1085 OperandSize::S32 => match self {
1086 OperandSize::S8 => Some(Extend::I32Extend8),
1087 OperandSize::S16 => Some(Extend::I32Extend16),
1088 _ => None,
1089 },
1090 OperandSize::S64 => match self {
1091 OperandSize::S8 => Some(Extend::I64Extend8),
1092 OperandSize::S16 => Some(Extend::I64Extend16),
1093 OperandSize::S32 => Some(Extend::I64Extend32),
1094 _ => None,
1095 },
1096 _ => None,
1097 }
1098 }
1099
1100 /// The number of bits in the mantissa.
1101 ///
1102 /// Only implemented for floats.
1103 pub fn mantissa_bits(&self) -> u8 {
1104 match self {
1105 Self::S32 => 8,
1106 Self::S64 => 11,
1107 _ => unimplemented!(),
1108 }
1109 }
1110}
1111
1112/// An abstraction over a register or immediate.
1113#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1114pub(crate) enum RegImm {
1115 /// A register.
1116 Reg(Reg),
1117 /// A tagged immediate argument.
1118 Imm(Imm),
1119}
1120
1121/// An tagged representation of an immediate.
1122#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1123pub(crate) enum Imm {
1124 /// I32 immediate.
1125 I32(u32),
1126 /// I64 immediate.
1127 I64(u64),
1128 /// F32 immediate.
1129 F32(u32),
1130 /// F64 immediate.
1131 F64(u64),
1132 /// V128 immediate.
1133 V128(i128),
1134}
1135
1136impl Imm {
1137 /// Create a new I64 immediate.
1138 pub fn i64(val: i64) -> Self {
1139 Self::I64(val as u64)
1140 }
1141
1142 /// Create a new I32 immediate.
1143 pub fn i32(val: i32) -> Self {
1144 Self::I32(val as u32)
1145 }
1146
1147 /// Create a new F32 immediate.
1148 pub fn f32(bits: u32) -> Self {
1149 Self::F32(bits)
1150 }
1151
1152 /// Create a new F64 immediate.
1153 pub fn f64(bits: u64) -> Self {
1154 Self::F64(bits)
1155 }
1156
1157 /// Create a new V128 immediate.
1158 pub fn v128(bits: i128) -> Self {
1159 Self::V128(bits)
1160 }
1161
1162 /// Convert the immediate to i32, if possible.
1163 pub fn to_i32(&self) -> Option<i32> {
1164 match self {
1165 Self::I32(v) => Some(*v as i32),
1166 Self::I64(v) => i32::try_from(*v as i64).ok(),
1167 _ => None,
1168 }
1169 }
1170
1171 /// Unwraps the underlying integer value as u64.
1172 /// # Panics
1173 /// This function panics if the underlying value can't be represented
1174 /// as u64.
1175 pub fn unwrap_as_u64(&self) -> u64 {
1176 match self {
1177 Self::I32(v) => *v as u64,
1178 Self::I64(v) => *v,
1179 Self::F32(v) => *v as u64,
1180 Self::F64(v) => *v,
1181 _ => unreachable!(),
1182 }
1183 }
1184
1185 /// Get the operand size of the immediate.
1186 pub fn size(&self) -> OperandSize {
1187 match self {
1188 Self::I32(_) | Self::F32(_) => OperandSize::S32,
1189 Self::I64(_) | Self::F64(_) => OperandSize::S64,
1190 Self::V128(_) => OperandSize::S128,
1191 }
1192 }
1193
1194 /// Get a little endian representation of the immediate.
1195 ///
1196 /// This method heap allocates and is intended to be used when adding
1197 /// values to the constant pool.
1198 pub fn to_bytes(&self) -> Vec<u8> {
1199 match self {
1200 Imm::I32(n) => n.to_le_bytes().to_vec(),
1201 Imm::I64(n) => n.to_le_bytes().to_vec(),
1202 Imm::F32(n) => n.to_le_bytes().to_vec(),
1203 Imm::F64(n) => n.to_le_bytes().to_vec(),
1204 Imm::V128(n) => n.to_le_bytes().to_vec(),
1205 }
1206 }
1207}
1208
1209/// The location of the [VMcontext] used for function calls.
1210#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1211pub(crate) enum VMContextLoc {
1212 /// Dynamic, stored in the given register.
1213 Reg(Reg),
1214 /// The pinned [VMContext] register.
1215 Pinned,
1216 /// A different VMContext is loaded at the provided offset from the current
1217 /// VMContext.
1218 OffsetFromPinned(u32),
1219}
1220
1221/// The maximum number of context arguments currently used across the compiler.
1222pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1223
1224/// Out-of-band special purpose arguments used for function call emission.
1225///
1226/// We cannot rely on the value stack for these values given that inserting
1227/// register or memory values at arbitrary locations of the value stack has the
1228/// potential to break the stack ordering principle, which states that older
1229/// values must always precede newer values, effectively simulating the order of
1230/// values in the machine stack.
1231/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1232/// it might be possible to construct it early on, but given that it might
1233/// contain allocatable registers, it's preferred to construct it in
1234/// [FnCall::emit].
1235#[derive(Clone, Debug)]
1236pub(crate) enum ContextArgs {
1237 /// A single context argument is required; the current pinned [VMcontext]
1238 /// register must be passed as the first argument of the function call.
1239 VMContext([VMContextLoc; 1]),
1240 /// The callee and caller context arguments are required. In this case, the
1241 /// callee context argument is usually stored into an allocatable register
1242 /// and the caller is always the current pinned [VMContext] pointer.
1243 CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1244}
1245
1246impl ContextArgs {
1247 /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1248 /// register as both the caller and callee context arguments.
1249 pub fn pinned_callee_and_caller_vmctx() -> Self {
1250 Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1251 }
1252
1253 /// Construct a [ContextArgs] that declares the usage of the pinned
1254 /// [VMContext] register as the only context argument.
1255 pub fn pinned_vmctx() -> Self {
1256 Self::VMContext([VMContextLoc::Pinned])
1257 }
1258
1259 /// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1260 /// indirectly from the pinned [VMContext] register as the only context
1261 /// argument.
1262 pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1263 Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1264 }
1265
1266 /// Construct a [ContextArgs] that declares a dynamic callee context and the
1267 /// pinned [VMContext] register as the context arguments.
1268 pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1269 Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1270 }
1271
1272 /// Get the length of the [ContextArgs].
1273 pub fn len(&self) -> usize {
1274 self.as_slice().len()
1275 }
1276
1277 /// Get a slice of the context arguments.
1278 pub fn as_slice(&self) -> &[VMContextLoc] {
1279 match self {
1280 Self::VMContext(a) => a.as_slice(),
1281 Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1282 }
1283 }
1284}
1285
1286#[derive(Copy, Clone, Debug)]
1287pub(crate) enum CalleeKind {
1288 /// A function call to a raw address.
1289 Indirect(Reg),
1290 /// A function call to a local function.
1291 Direct(UserExternalNameRef),
1292}
1293
1294impl CalleeKind {
1295 /// Creates a callee kind from a register.
1296 pub fn indirect(reg: Reg) -> Self {
1297 Self::Indirect(reg)
1298 }
1299
1300 /// Creates a direct callee kind from a function name.
1301 pub fn direct(name: UserExternalNameRef) -> Self {
1302 Self::Direct(name)
1303 }
1304}
1305
1306impl RegImm {
1307 /// Register constructor.
1308 pub fn reg(r: Reg) -> Self {
1309 RegImm::Reg(r)
1310 }
1311
1312 /// I64 immediate constructor.
1313 pub fn i64(val: i64) -> Self {
1314 RegImm::Imm(Imm::i64(val))
1315 }
1316
1317 /// I32 immediate constructor.
1318 pub fn i32(val: i32) -> Self {
1319 RegImm::Imm(Imm::i32(val))
1320 }
1321
1322 /// F32 immediate, stored using its bits representation.
1323 pub fn f32(bits: u32) -> Self {
1324 RegImm::Imm(Imm::f32(bits))
1325 }
1326
1327 /// F64 immediate, stored using its bits representation.
1328 pub fn f64(bits: u64) -> Self {
1329 RegImm::Imm(Imm::f64(bits))
1330 }
1331
1332 /// V128 immediate.
1333 pub fn v128(bits: i128) -> Self {
1334 RegImm::Imm(Imm::v128(bits))
1335 }
1336}
1337
1338impl From<Reg> for RegImm {
1339 fn from(r: Reg) -> Self {
1340 Self::Reg(r)
1341 }
1342}
1343
1344#[derive(Debug)]
1345pub enum RoundingMode {
1346 Nearest,
1347 Up,
1348 Down,
1349 Zero,
1350}
1351
1352/// Memory flags for trusted loads/stores.
1353pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1354
1355/// Flags used for WebAssembly loads / stores.
1356/// Untrusted by default so we don't set `no_trap`.
1357/// We also ensure that the endianness is the right one for WebAssembly.
1358pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1359
1360/// Generic MacroAssembler interface used by the code generation.
1361///
1362/// The MacroAssembler trait aims to expose an interface, high-level enough,
1363/// so that each ISA can provide its own lowering to machine code. For example,
1364/// for WebAssembly operators that don't have a direct mapping to a machine
1365/// a instruction, the interface defines a signature matching the WebAssembly
1366/// operator, allowing each implementation to lower such operator entirely.
1367/// This approach attributes more responsibility to the MacroAssembler, but frees
1368/// the caller from concerning about assembling the right sequence of
1369/// instructions at the operator callsite.
1370///
1371/// The interface defaults to a three-argument form for binary operations;
1372/// this allows a natural mapping to instructions for RISC architectures,
1373/// that use three-argument form.
1374/// This approach allows for a more general interface that can be restricted
1375/// where needed, in the case of architectures that use a two-argument form.
1376
1377pub(crate) trait MacroAssembler {
1378 /// The addressing mode.
1379 type Address: Copy + Debug;
1380
1381 /// The pointer representation of the target ISA,
1382 /// used to access information from [`VMOffsets`].
1383 type Ptr: PtrSize;
1384
1385 /// The ABI details of the target.
1386 type ABI: abi::ABI;
1387
1388 /// Emit the function prologue.
1389 fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1390 self.frame_setup()?;
1391 self.check_stack(vmctx)
1392 }
1393
1394 /// Generate the frame setup sequence.
1395 fn frame_setup(&mut self) -> Result<()>;
1396
1397 /// Generate the frame restore sequence.
1398 fn frame_restore(&mut self) -> Result<()>;
1399
1400 /// Emit a stack check.
1401 fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1402
1403 /// Emit the function epilogue.
1404 fn epilogue(&mut self) -> Result<()> {
1405 self.frame_restore()
1406 }
1407
1408 /// Reserve stack space.
1409 fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1410
1411 /// Free stack space.
1412 fn free_stack(&mut self, bytes: u32) -> Result<()>;
1413
1414 /// Reset the stack pointer to the given offset;
1415 ///
1416 /// Used to reset the stack pointer to a given offset
1417 /// when dealing with unreachable code.
1418 fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1419
1420 /// Get the address of a local slot.
1421 fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1422
1423 /// Constructs an address with an offset that is relative to the
1424 /// current position of the stack pointer (e.g. [sp + (sp_offset -
1425 /// offset)].
1426 fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1427
1428 /// Constructs an address with an offset that is absolute to the
1429 /// current position of the stack pointer (e.g. [sp + offset].
1430 fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1431
1432 /// Alias for [`Self::address_at_reg`] using the VMContext register as
1433 /// a base. The VMContext register is derived from the ABI type that is
1434 /// associated to the MacroAssembler.
1435 fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1436
1437 /// Construct an address that is absolute to the current position
1438 /// of the given register.
1439 fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1440
1441 /// Emit a function call to either a local or external function.
1442 fn call(
1443 &mut self,
1444 stack_args_size: u32,
1445 f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1446 ) -> Result<u32>;
1447
1448 /// Acquire a scratch register and execute the given callback.
1449 fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1450
1451 /// Convenience wrapper over [`Self::with_scratch`], derives the register class
1452 /// for a particular Wasm value type.
1453 fn with_scratch_for<R>(
1454 &mut self,
1455 ty: WasmValType,
1456 f: impl FnOnce(&mut Self, Scratch) -> R,
1457 ) -> R {
1458 match ty {
1459 WasmValType::I32
1460 | WasmValType::I64
1461 | WasmValType::Ref(WasmRefType {
1462 heap_type: WasmHeapType::Func,
1463 ..
1464 }) => self.with_scratch::<IntScratch, _>(f),
1465 WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1466 self.with_scratch::<FloatScratch, _>(f)
1467 }
1468 _ => unimplemented!(),
1469 }
1470 }
1471
1472 /// Get stack pointer offset.
1473 fn sp_offset(&self) -> Result<SPOffset>;
1474
1475 /// Perform a stack store.
1476 fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1477
1478 /// Alias for `MacroAssembler::store` with the operand size corresponding
1479 /// to the pointer size of the target.
1480 fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1481
1482 /// Perform a WebAssembly store.
1483 /// A WebAssembly store introduces several additional invariants compared to
1484 /// [Self::store], more precisely, it can implicitly trap, in certain
1485 /// circumstances, even if explicit bounds checks are elided, in that sense,
1486 /// we consider this type of load as untrusted. It can also differ with
1487 /// regards to the endianness depending on the target ISA. For this reason,
1488 /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1489 /// stores.
1490 fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1491
1492 /// Perform a zero-extended stack load.
1493 fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1494
1495 /// Perform a WebAssembly load.
1496 /// A WebAssembly load introduces several additional invariants compared to
1497 /// [Self::load], more precisely, it can implicitly trap, in certain
1498 /// circumstances, even if explicit bounds checks are elided, in that sense,
1499 /// we consider this type of load as untrusted. It can also differ with
1500 /// regards to the endianness depending on the target ISA. For this reason,
1501 /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1502 /// loads.
1503 fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1504
1505 /// Alias for `MacroAssembler::load` with the operand size corresponding
1506 /// to the pointer size of the target.
1507 fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1508
1509 /// Computes the effective address and stores the result in the destination
1510 /// register.
1511 fn compute_addr(
1512 &mut self,
1513 _src: Self::Address,
1514 _dst: WritableReg,
1515 _size: OperandSize,
1516 ) -> Result<()>;
1517
1518 /// Pop a value from the machine stack into the given register.
1519 fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1520
1521 /// Perform a move.
1522 fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1523
1524 /// Perform a conditional move.
1525 fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1526 -> Result<()>;
1527
1528 /// Performs a memory move of bytes from src to dest.
1529 /// Bytes are moved in blocks of 8 bytes, where possible.
1530 fn memmove(
1531 &mut self,
1532 src: SPOffset,
1533 dst: SPOffset,
1534 bytes: u32,
1535 direction: MemMoveDirection,
1536 ) -> Result<()> {
1537 match direction {
1538 MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1539 MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1540 }
1541 // At least 4 byte aligned.
1542 debug_assert!(bytes % 4 == 0);
1543 let mut remaining = bytes;
1544 let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1545
1546 let word_bytes = word_bytes as u32;
1547
1548 let mut dst_offs;
1549 let mut src_offs;
1550 match direction {
1551 MemMoveDirection::LowToHigh => {
1552 dst_offs = dst.as_u32() - bytes;
1553 src_offs = src.as_u32() - bytes;
1554 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1555 while remaining >= word_bytes {
1556 remaining -= word_bytes;
1557 dst_offs += word_bytes;
1558 src_offs += word_bytes;
1559
1560 masm.load_ptr(
1561 masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1562 scratch.writable(),
1563 )?;
1564 masm.store_ptr(
1565 scratch.inner(),
1566 masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1567 )?;
1568 }
1569 wasmtime_environ::error::Ok(())
1570 })?;
1571 }
1572 MemMoveDirection::HighToLow => {
1573 // Go from the end to the beginning to handle overlapping addresses.
1574 src_offs = src.as_u32();
1575 dst_offs = dst.as_u32();
1576 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1577 while remaining >= word_bytes {
1578 masm.load_ptr(
1579 masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1580 scratch.writable(),
1581 )?;
1582 masm.store_ptr(
1583 scratch.inner(),
1584 masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1585 )?;
1586
1587 remaining -= word_bytes;
1588 src_offs -= word_bytes;
1589 dst_offs -= word_bytes;
1590 }
1591 wasmtime_environ::error::Ok(())
1592 })?;
1593 }
1594 }
1595
1596 if remaining > 0 {
1597 let half_word = word_bytes / 2;
1598 let ptr_size = OperandSize::from_bytes(half_word as u8);
1599 debug_assert!(remaining == half_word);
1600 // Need to move the offsets ahead in the `LowToHigh` case to
1601 // compensate for the initial subtraction of `bytes`.
1602 if direction == MemMoveDirection::LowToHigh {
1603 dst_offs += half_word;
1604 src_offs += half_word;
1605 }
1606
1607 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1608 masm.load(
1609 masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1610 scratch.writable(),
1611 ptr_size,
1612 )?;
1613 masm.store(
1614 scratch.inner().into(),
1615 masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1616 ptr_size,
1617 )?;
1618 wasmtime_environ::error::Ok(())
1619 })?;
1620 }
1621 Ok(())
1622 }
1623
1624 /// Perform add operation.
1625 fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1626
1627 /// Perform a checked unsigned integer addition, emitting the provided trap
1628 /// if the addition overflows.
1629 fn checked_uadd(
1630 &mut self,
1631 dst: WritableReg,
1632 lhs: Reg,
1633 rhs: RegImm,
1634 size: OperandSize,
1635 trap: TrapCode,
1636 ) -> Result<()>;
1637
1638 /// Perform subtraction operation.
1639 fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1640
1641 /// Perform multiplication operation.
1642 fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1643
1644 /// Perform a floating point add operation.
1645 fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1646
1647 /// Perform a floating point subtraction operation.
1648 fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1649
1650 /// Perform a floating point multiply operation.
1651 fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1652
1653 /// Perform a floating point divide operation.
1654 fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1655
1656 /// Perform a floating point minimum operation. In x86, this will emit
1657 /// multiple instructions.
1658 fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1659
1660 /// Perform a floating point maximum operation. In x86, this will emit
1661 /// multiple instructions.
1662 fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1663
1664 /// Perform a floating point copysign operation. In x86, this will emit
1665 /// multiple instructions.
1666 fn float_copysign(
1667 &mut self,
1668 dst: WritableReg,
1669 lhs: Reg,
1670 rhs: Reg,
1671 size: OperandSize,
1672 ) -> Result<()>;
1673
1674 /// Perform a floating point abs operation.
1675 fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1676
1677 /// Perform a floating point negation operation.
1678 fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1679
1680 /// Perform a floating point floor operation.
1681 fn float_round<
1682 F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1683 >(
1684 &mut self,
1685 mode: RoundingMode,
1686 env: &mut FuncEnv<Self::Ptr>,
1687 context: &mut CodeGenContext<Emission>,
1688 size: OperandSize,
1689 fallback: F,
1690 ) -> Result<()>;
1691
1692 /// Perform a floating point square root operation.
1693 fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1694
1695 /// Perform logical and operation.
1696 fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1697
1698 /// Perform logical or operation.
1699 fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1700
1701 /// Perform logical exclusive or operation.
1702 fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1703
1704 /// Perform a shift operation between a register and an immediate.
1705 fn shift_ir(
1706 &mut self,
1707 dst: WritableReg,
1708 imm: Imm,
1709 lhs: Reg,
1710 kind: ShiftKind,
1711 size: OperandSize,
1712 ) -> Result<()>;
1713
1714 /// Perform a shift operation between two registers.
1715 /// This case is special in that some architectures have specific expectations
1716 /// regarding the location of the instruction arguments. To free the
1717 /// caller from having to deal with the architecture specific constraints
1718 /// we give this function access to the code generation context, allowing
1719 /// each implementation to decide the lowering path.
1720 fn shift(
1721 &mut self,
1722 context: &mut CodeGenContext<Emission>,
1723 kind: ShiftKind,
1724 size: OperandSize,
1725 ) -> Result<()>;
1726
1727 /// Perform division operation.
1728 /// Division is special in that some architectures have specific
1729 /// expectations regarding the location of the instruction
1730 /// arguments and regarding the location of the quotient /
1731 /// remainder. To free the caller from having to deal with the
1732 /// architecture specific constraints we give this function access
1733 /// to the code generation context, allowing each implementation
1734 /// to decide the lowering path. For cases in which division is a
1735 /// unconstrained binary operation, the caller can decide to use
1736 /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1737 /// functions.
1738 fn div(
1739 &mut self,
1740 context: &mut CodeGenContext<Emission>,
1741 kind: DivKind,
1742 size: OperandSize,
1743 ) -> Result<()>;
1744
1745 /// Calculate remainder.
1746 fn rem(
1747 &mut self,
1748 context: &mut CodeGenContext<Emission>,
1749 kind: RemKind,
1750 size: OperandSize,
1751 ) -> Result<()>;
1752
1753 /// Compares `src1` against `src2` for the side effect of setting processor
1754 /// flags.
1755 ///
1756 /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1757 /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1758 /// `src2 == b`
1759 fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1760
1761 /// Compare src and dst and put the result in dst.
1762 /// This function will potentially emit a series of instructions.
1763 ///
1764 /// The initial value in `dst` is the left-hand-side of the comparison and
1765 /// the initial value in `src` is the right-hand-side of the comparison.
1766 /// That means for `a < b` then `dst == a` and `src == b`.
1767 fn cmp_with_set(
1768 &mut self,
1769 dst: WritableReg,
1770 src: RegImm,
1771 kind: IntCmpKind,
1772 size: OperandSize,
1773 ) -> Result<()>;
1774
1775 /// Compare floats in src1 and src2 and put the result in dst.
1776 /// In x86, this will emit multiple instructions.
1777 fn float_cmp_with_set(
1778 &mut self,
1779 dst: WritableReg,
1780 src1: Reg,
1781 src2: Reg,
1782 kind: FloatCmpKind,
1783 size: OperandSize,
1784 ) -> Result<()>;
1785
1786 /// Count the number of leading zeroes in src and put the result in dst.
1787 /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1788 /// false.
1789 fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1790
1791 /// Count the number of trailing zeroes in src and put the result in dst.masm
1792 /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1793 /// false.
1794 fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1795
1796 /// Push the register to the stack, returning the stack slot metadata.
1797 // NB
1798 // The stack alignment should not be assumed after any call to `push`,
1799 // unless explicitly aligned otherwise. Typically, stack alignment is
1800 // maintained at call sites and during the execution of
1801 // epilogues.
1802 fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1803
1804 /// Finalize the assembly and return the result.
1805 fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1806
1807 /// Zero a particular register.
1808 fn zero(&mut self, reg: WritableReg) -> Result<()>;
1809
1810 /// Count the number of 1 bits in src and put the result in dst. In x64,
1811 /// this will emit multiple instructions if the `has_popcnt` flag is false.
1812 fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1813
1814 /// Converts an i64 to an i32 by discarding the high 32 bits.
1815 fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1816
1817 /// Extends an integer of a given size to a larger size.
1818 fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1819
1820 /// Emits one or more instructions to perform a signed truncation of a
1821 /// float into an integer.
1822 fn signed_truncate(
1823 &mut self,
1824 dst: WritableReg,
1825 src: Reg,
1826 src_size: OperandSize,
1827 dst_size: OperandSize,
1828 kind: TruncKind,
1829 ) -> Result<()>;
1830
1831 /// Emits one or more instructions to perform an unsigned truncation of a
1832 /// float into an integer.
1833 fn unsigned_truncate(
1834 &mut self,
1835 context: &mut CodeGenContext<Emission>,
1836 src_size: OperandSize,
1837 dst_size: OperandSize,
1838 kind: TruncKind,
1839 ) -> Result<()>;
1840
1841 /// Emits one or more instructions to perform a signed convert of an
1842 /// integer into a float.
1843 fn signed_convert(
1844 &mut self,
1845 dst: WritableReg,
1846 src: Reg,
1847 src_size: OperandSize,
1848 dst_size: OperandSize,
1849 ) -> Result<()>;
1850
1851 /// Emits one or more instructions to perform an unsigned convert of an
1852 /// integer into a float.
1853 fn unsigned_convert(
1854 &mut self,
1855 dst: WritableReg,
1856 src: Reg,
1857 tmp_gpr: Reg,
1858 src_size: OperandSize,
1859 dst_size: OperandSize,
1860 ) -> Result<()>;
1861
1862 /// Reinterpret a float as an integer.
1863 fn reinterpret_float_as_int(
1864 &mut self,
1865 dst: WritableReg,
1866 src: Reg,
1867 size: OperandSize,
1868 ) -> Result<()>;
1869
1870 /// Reinterpret an integer as a float.
1871 fn reinterpret_int_as_float(
1872 &mut self,
1873 dst: WritableReg,
1874 src: Reg,
1875 size: OperandSize,
1876 ) -> Result<()>;
1877
1878 /// Demote an f64 to an f32.
1879 fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1880
1881 /// Promote an f32 to an f64.
1882 fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1883
1884 /// Zero a given memory range.
1885 ///
1886 /// The default implementation divides the given memory range
1887 /// into word-sized slots. Then it unrolls a series of store
1888 /// instructions, effectively assigning zero to each slot.
1889 fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1890 let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1891 if mem.is_empty() {
1892 return Ok(());
1893 }
1894
1895 let start = if mem.start % word_size == 0 {
1896 mem.start
1897 } else {
1898 // Ensure that the start of the range is at least 4-byte aligned.
1899 assert!(mem.start % 4 == 0);
1900 let start = align_to(mem.start, word_size);
1901 let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1902 self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1903 // Ensure that the new start of the range, is word-size aligned.
1904 assert!(start % word_size == 0);
1905 start
1906 };
1907
1908 let end = align_to(mem.end, word_size);
1909 let slots = (end - start) / word_size;
1910
1911 if slots == 1 {
1912 let slot = LocalSlot::i64(start + word_size);
1913 let addr: Self::Address = self.local_address(&slot)?;
1914 self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1915 } else {
1916 // TODO
1917 // Add an upper bound to this generation;
1918 // given a considerably large amount of slots
1919 // this will be inefficient.
1920 self.with_scratch::<IntScratch, _>(|masm, scratch| {
1921 masm.zero(scratch.writable())?;
1922 let zero = RegImm::reg(scratch.inner());
1923
1924 for step in (start..end).step_by(word_size as usize) {
1925 let slot = LocalSlot::i64(step + word_size);
1926 let addr: Self::Address = masm.local_address(&slot)?;
1927 masm.store(zero, addr, OperandSize::S64)?;
1928 }
1929 wasmtime_environ::error::Ok(())
1930 })?;
1931 }
1932
1933 Ok(())
1934 }
1935
1936 /// Generate a label.
1937 fn get_label(&mut self) -> Result<MachLabel>;
1938
1939 /// Bind the given label at the current code offset.
1940 fn bind(&mut self, label: MachLabel) -> Result<()>;
1941
1942 /// Conditional branch.
1943 ///
1944 /// Performs a comparison between the two operands,
1945 /// and immediately after emits a jump to the given
1946 /// label destination if the condition is met.
1947 fn branch(
1948 &mut self,
1949 kind: IntCmpKind,
1950 lhs: Reg,
1951 rhs: RegImm,
1952 taken: MachLabel,
1953 size: OperandSize,
1954 ) -> Result<()>;
1955
1956 /// Emits and unconditional jump to the given label.
1957 fn jmp(&mut self, target: MachLabel) -> Result<()>;
1958
1959 /// Emits a jump table sequence. The default label is specified as
1960 /// the last element of the targets slice.
1961 fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1962
1963 /// Emit an unreachable code trap.
1964 fn unreachable(&mut self) -> Result<()>;
1965
1966 /// Emit an unconditional trap.
1967 fn trap(&mut self, code: TrapCode) -> Result<()>;
1968
1969 /// Traps if the condition code is met.
1970 fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1971
1972 /// Trap if the source register is zero.
1973 fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1974
1975 /// Ensures that the stack pointer is correctly positioned before an unconditional
1976 /// jump according to the requirements of the destination target.
1977 fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1978 let bytes = self
1979 .sp_offset()?
1980 .as_u32()
1981 .checked_sub(target.as_u32())
1982 .unwrap_or(0);
1983
1984 if bytes > 0 {
1985 self.free_stack(bytes)?;
1986 }
1987
1988 Ok(())
1989 }
1990
1991 /// Mark the start of a source location returning the machine code offset
1992 /// and the relative source code location.
1993 fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1994
1995 /// Mark the end of a source location.
1996 fn end_source_loc(&mut self) -> Result<()>;
1997
1998 /// The current offset, in bytes from the beginning of the function.
1999 fn current_code_offset(&self) -> Result<CodeOffset>;
2000
2001 /// Performs a 128-bit addition
2002 fn add128(
2003 &mut self,
2004 dst_lo: WritableReg,
2005 dst_hi: WritableReg,
2006 lhs_lo: Reg,
2007 lhs_hi: Reg,
2008 rhs_lo: Reg,
2009 rhs_hi: Reg,
2010 ) -> Result<()>;
2011
2012 /// Performs a 128-bit subtraction
2013 fn sub128(
2014 &mut self,
2015 dst_lo: WritableReg,
2016 dst_hi: WritableReg,
2017 lhs_lo: Reg,
2018 lhs_hi: Reg,
2019 rhs_lo: Reg,
2020 rhs_hi: Reg,
2021 ) -> Result<()>;
2022
2023 /// Performs a widening multiplication from two 64-bit operands into a
2024 /// 128-bit result.
2025 ///
2026 /// Note that some platforms require special handling of registers in this
2027 /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
2028 fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2029 -> Result<()>;
2030
2031 /// Takes the value in a src operand and replicates it across lanes of
2032 /// `size` in a destination result.
2033 fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2034
2035 /// Performs a shuffle between two 128-bit vectors into a 128-bit result
2036 /// using lanes as a mask to select which indexes to copy.
2037 fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2038
2039 /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
2040 fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2041
2042 /// Performs the RMW `op` operation on the passed `addr`.
2043 ///
2044 /// The value *before* the operation was performed is written back to the `operand` register.
2045 fn atomic_rmw(
2046 &mut self,
2047 context: &mut CodeGenContext<Emission>,
2048 addr: Self::Address,
2049 size: OperandSize,
2050 op: RmwOp,
2051 flags: MemFlags,
2052 extend: Option<Extend<Zero>>,
2053 ) -> Result<()>;
2054
2055 /// Extracts the scalar value from `src` in `lane` to `dst`.
2056 fn extract_lane(
2057 &mut self,
2058 src: Reg,
2059 dst: WritableReg,
2060 lane: u8,
2061 kind: ExtractLaneKind,
2062 ) -> Result<()>;
2063
2064 /// Replaces the value in `lane` in `dst` with the value in `src`.
2065 fn replace_lane(
2066 &mut self,
2067 src: RegImm,
2068 dst: WritableReg,
2069 lane: u8,
2070 kind: ReplaceLaneKind,
2071 ) -> Result<()>;
2072
2073 /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2074 /// and `replacement` (at the top of the context's stack).
2075 ///
2076 /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2077 /// expect parameters in specific registers. The context stack contains the `replacement`,
2078 /// and `expected` values in that order. The implementer is expected to push the value at
2079 /// `addr` before the update to the context's stack before returning.
2080 fn atomic_cas(
2081 &mut self,
2082 context: &mut CodeGenContext<Emission>,
2083 addr: Self::Address,
2084 size: OperandSize,
2085 flags: MemFlags,
2086 extend: Option<Extend<Zero>>,
2087 ) -> Result<()>;
2088
2089 /// Compares vector registers `lhs` and `rhs` for equality and puts the
2090 /// vector of results in `dst`.
2091 fn v128_eq(
2092 &mut self,
2093 dst: WritableReg,
2094 lhs: Reg,
2095 rhs: Reg,
2096 kind: VectorEqualityKind,
2097 ) -> Result<()>;
2098
2099 /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2100 /// vector of results in `dst`.
2101 fn v128_ne(
2102 &mut self,
2103 dst: WritableReg,
2104 lhs: Reg,
2105 rhs: Reg,
2106 kind: VectorEqualityKind,
2107 ) -> Result<()>;
2108
2109 /// Performs a less than comparison with vector registers `lhs` and `rhs`
2110 /// and puts the vector of results in `dst`.
2111 fn v128_lt(
2112 &mut self,
2113 dst: WritableReg,
2114 lhs: Reg,
2115 rhs: Reg,
2116 kind: VectorCompareKind,
2117 ) -> Result<()>;
2118
2119 /// Performs a less than or equal comparison with vector registers `lhs`
2120 /// and `rhs` and puts the vector of results in `dst`.
2121 fn v128_le(
2122 &mut self,
2123 dst: WritableReg,
2124 lhs: Reg,
2125 rhs: Reg,
2126 kind: VectorCompareKind,
2127 ) -> Result<()>;
2128
2129 /// Performs a greater than comparison with vector registers `lhs` and
2130 /// `rhs` and puts the vector of results in `dst`.
2131 fn v128_gt(
2132 &mut self,
2133 dst: WritableReg,
2134 lhs: Reg,
2135 rhs: Reg,
2136 kind: VectorCompareKind,
2137 ) -> Result<()>;
2138
2139 /// Performs a greater than or equal comparison with vector registers `lhs`
2140 /// and `rhs` and puts the vector of results in `dst`.
2141 fn v128_ge(
2142 &mut self,
2143 dst: WritableReg,
2144 lhs: Reg,
2145 rhs: Reg,
2146 kind: VectorCompareKind,
2147 ) -> Result<()>;
2148
2149 /// Emit a memory fence.
2150 fn fence(&mut self) -> Result<()>;
2151
2152 /// Perform a logical `not` operation on the 128bits vector value in `dst`.
2153 fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2154
2155 /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2156 /// the result to `dst`.
2157 fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2158
2159 /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2160 /// the result to `dst`.
2161 ///
2162 /// `and_not` is not commutative: dst = !src1 & src2.
2163 fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2164
2165 /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2166 /// the result to `dst`.
2167 fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2168
2169 /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2170 /// the result to `dst`.
2171 fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2172
2173 /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2174 /// from `src1` when mask is 1, and from `src2` when mask is 0.
2175 ///
2176 /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2177 fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2178
2179 /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2180 fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2181
2182 /// Convert vector of integers to vector of floating points.
2183 fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2184
2185 /// Convert two input vectors into a smaller lane vector by narrowing each
2186 /// lane.
2187 fn v128_narrow(
2188 &mut self,
2189 src1: Reg,
2190 src2: Reg,
2191 dst: WritableReg,
2192 kind: V128NarrowKind,
2193 ) -> Result<()>;
2194
2195 /// Converts a vector containing two 64-bit floating point lanes to two
2196 /// 32-bit floating point lanes and setting the two higher lanes to 0.
2197 fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2198
2199 /// Converts a vector containing four 32-bit floating point lanes to two
2200 /// 64-bit floating point lanes. Only the two lower lanes are converted.
2201 fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2202
2203 /// Converts low or high half of the smaller lane vector to a larger lane
2204 /// vector.
2205 fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2206
2207 /// Perform a vector add between `lsh` and `rhs`, placing the result in
2208 /// `dst`.
2209 fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2210
2211 /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2212 fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2213
2214 /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2215 fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2216 -> Result<()>;
2217
2218 /// Perform an absolute operation on a vector.
2219 fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2220
2221 /// Vectorized negate of the content of `op`.
2222 fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2223
2224 /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2225 /// integer at the top of the stack, on the 128-bit vector specified by the second value
2226 /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2227 ///
2228 /// The shift amount is taken modulo `lane_width`.
2229 fn v128_shift(
2230 &mut self,
2231 context: &mut CodeGenContext<Emission>,
2232 lane_width: OperandSize,
2233 kind: ShiftKind,
2234 ) -> Result<()>;
2235
2236 /// Perform a saturating integer q-format rounding multiplication.
2237 fn v128_q15mulr_sat_s(
2238 &mut self,
2239 lhs: Reg,
2240 rhs: Reg,
2241 dst: WritableReg,
2242 size: OperandSize,
2243 ) -> Result<()>;
2244
2245 /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2246 /// otherwise.
2247 fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2248
2249 /// Extracts the high bit of each lane in `src` and produces a scalar mask
2250 /// with all bits concatenated in `dst`.
2251 fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2252
2253 /// Lanewise truncation operation.
2254 ///
2255 /// If using an integer kind of truncation, then this performs a lane-wise
2256 /// saturating conversion from float to integer using the IEEE
2257 /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2258 /// resulting lane is 0. If the rounded integer value of a lane is outside
2259 /// the range of the destination type, the result is saturated to the
2260 /// nearest representable integer value.
2261 fn v128_trunc(
2262 &mut self,
2263 context: &mut CodeGenContext<Emission>,
2264 kind: V128TruncKind,
2265 ) -> Result<()>;
2266
2267 /// Perform a lane-wise `min` operation between `src1` and `src2`.
2268 fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2269 -> Result<()>;
2270
2271 /// Perform a lane-wise `max` operation between `src1` and `src2`.
2272 fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2273 -> Result<()>;
2274
2275 /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2276 /// inputs. This is equivalent to an extend followed by a multiply.
2277 ///
2278 /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2279 /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2280 /// 8bits of the 16bits lanes.
2281 fn v128_extmul(
2282 &mut self,
2283 context: &mut CodeGenContext<Emission>,
2284 kind: V128ExtMulKind,
2285 ) -> Result<()>;
2286
2287 /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2288 /// wider results than the inputs).
2289 fn v128_extadd_pairwise(
2290 &mut self,
2291 src: Reg,
2292 dst: WritableReg,
2293 kind: V128ExtAddKind,
2294 ) -> Result<()>;
2295
2296 /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2297 /// adjacent pairs of the 32-bit results.
2298 fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2299
2300 /// Count the number of bits set in each lane.
2301 fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2302
2303 /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2304 /// and put the results in `dst`.
2305 fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2306
2307 /// Lane-wise IEEE division on vectors of floats.
2308 fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2309
2310 /// Lane-wise IEEE square root of vector of floats.
2311 fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2312
2313 /// Lane-wise ceiling of vector of floats.
2314 fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2315
2316 /// Lane-wise flooring of vector of floats.
2317 fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2318
2319 /// Lane-wise rounding to nearest integer for vector of floats.
2320 fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2321
2322 /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2323 fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2324
2325 /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2326 fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2327}