cranelift_codegen/isa/x64/inst/
mod.rs

1//! This module defines x86_64-specific machine instruction types.
2
3pub use emit_state::EmitState;
4
5use crate::binemit::{Addend, CodeOffset, Reloc};
6use crate::ir::{ExternalName, LibCall, TrapCode, Type, types};
7use crate::isa::x64::abi::X64ABIMachineSpec;
8use crate::isa::x64::inst::regs::{pretty_print_reg, show_ireg_sized};
9use crate::isa::x64::settings as x64_settings;
10use crate::isa::{CallConv, FunctionAlignment};
11use crate::{CodegenError, CodegenResult, settings};
12use crate::{machinst::*, trace};
13use alloc::boxed::Box;
14use alloc::vec::Vec;
15use core::slice;
16use cranelift_assembler_x64 as asm;
17use smallvec::{SmallVec, smallvec};
18use std::fmt::{self, Write};
19use std::string::{String, ToString};
20
21pub mod args;
22mod emit;
23mod emit_state;
24#[cfg(test)]
25mod emit_tests;
26pub mod external;
27pub mod regs;
28mod stack_switch;
29pub mod unwind;
30
31use args::*;
32
33//=============================================================================
34// Instructions (top level): definition
35
36// `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here.
37pub use super::lower::isle::generated_code::AtomicRmwSeqOp;
38pub use super::lower::isle::generated_code::MInst as Inst;
39
40/// Out-of-line data for return-calls, to keep the size of `Inst` down.
41#[derive(Clone, Debug)]
42pub struct ReturnCallInfo<T> {
43    /// Where this call is going.
44    pub dest: T,
45
46    /// The size of the argument area for this return-call, potentially smaller than that of the
47    /// caller, but never larger.
48    pub new_stack_arg_size: u32,
49
50    /// The in-register arguments and their constraints.
51    pub uses: CallArgList,
52
53    /// A temporary for use when moving the return address.
54    pub tmp: WritableGpr,
55}
56
57#[test]
58#[cfg(target_pointer_width = "64")]
59fn inst_size_test() {
60    // This test will help with unintentionally growing the size
61    // of the Inst enum.
62    assert_eq!(48, std::mem::size_of::<Inst>());
63}
64
65pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
66    let xs = x as i64;
67    xs == ((xs << 32) >> 32)
68}
69
70impl Inst {
71    /// Retrieve a list of ISA feature sets in which the instruction is available. An empty list
72    /// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and
73    /// below); more than one `InstructionSet` in the list indicates that the instruction is present
74    /// *any* of the included ISA feature sets.
75    fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> {
76        match self {
77            // These instructions are part of SSE2, which is a basic requirement in Cranelift, and
78            // don't have to be checked.
79            Inst::AtomicRmwSeq { .. }
80            | Inst::CallKnown { .. }
81            | Inst::CallUnknown { .. }
82            | Inst::ReturnCallKnown { .. }
83            | Inst::ReturnCallUnknown { .. }
84            | Inst::CheckedSRemSeq { .. }
85            | Inst::CheckedSRemSeq8 { .. }
86            | Inst::Cmove { .. }
87            | Inst::CmpRmiR { .. }
88            | Inst::CvtFloatToSintSeq { .. }
89            | Inst::CvtFloatToUintSeq { .. }
90            | Inst::CvtUint64ToFloatSeq { .. }
91            | Inst::Fence { .. }
92            | Inst::Hlt
93            | Inst::Imm { .. }
94            | Inst::JmpCond { .. }
95            | Inst::JmpCondOr { .. }
96            | Inst::WinchJmpIf { .. }
97            | Inst::JmpKnown { .. }
98            | Inst::JmpTableSeq { .. }
99            | Inst::JmpUnknown { .. }
100            | Inst::LoadEffectiveAddress { .. }
101            | Inst::LoadExtName { .. }
102            | Inst::LockCmpxchg { .. }
103            | Inst::LockXadd { .. }
104            | Inst::Xchg { .. }
105            | Inst::MovImmM { .. }
106            | Inst::MovRM { .. }
107            | Inst::MovRR { .. }
108            | Inst::MovFromPReg { .. }
109            | Inst::MovToPReg { .. }
110            | Inst::Nop { .. }
111            | Inst::Pop64 { .. }
112            | Inst::Push64 { .. }
113            | Inst::StackProbeLoop { .. }
114            | Inst::Args { .. }
115            | Inst::Rets { .. }
116            | Inst::Ret { .. }
117            | Inst::Setcc { .. }
118            | Inst::StackSwitchBasic { .. }
119            | Inst::TrapIf { .. }
120            | Inst::TrapIfAnd { .. }
121            | Inst::TrapIfOr { .. }
122            | Inst::Ud2 { .. }
123            | Inst::XmmCmove { .. }
124            | Inst::XmmCmpRmR { .. }
125            | Inst::XmmMinMaxSeq { .. }
126            | Inst::XmmUninitializedValue { .. }
127            | Inst::GprUninitializedValue { .. }
128            | Inst::ElfTlsGetAddr { .. }
129            | Inst::MachOTlsGetAddr { .. }
130            | Inst::CoffTlsGetAddr { .. }
131            | Inst::Unwind { .. }
132            | Inst::DummyUse { .. } => smallvec![],
133
134            Inst::LockCmpxchg16b { .. }
135            | Inst::Atomic128RmwSeq { .. }
136            | Inst::Atomic128XchgSeq { .. } => smallvec![InstructionSet::CMPXCHG16b],
137
138            // These use dynamic SSE opcodes.
139            Inst::XmmRmR { op, .. }
140            | Inst::XmmRmRUnaligned { op, .. }
141            | Inst::XmmRmRBlend { op, .. }
142            | Inst::XmmRmRImm { op, .. }
143            | Inst::XmmUnaryRmRImm { op, .. }
144            | Inst::XmmUnaryRmRUnaligned { op, .. }
145            | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
146
147            Inst::XmmUnaryRmREvex { op, .. }
148            | Inst::XmmRmREvex { op, .. }
149            | Inst::XmmRmREvex3 { op, .. }
150            | Inst::XmmUnaryRmRImmEvex { op, .. } => op.available_from(),
151
152            Inst::XmmRmiRVex { op, .. }
153            | Inst::XmmRmRVex3 { op, .. }
154            | Inst::XmmRmRImmVex { op, .. }
155            | Inst::XmmRmRBlendVex { op, .. }
156            | Inst::XmmVexPinsr { op, .. }
157            | Inst::XmmUnaryRmRVex { op, .. }
158            | Inst::XmmUnaryRmRImmVex { op, .. }
159            | Inst::XmmMovRMVex { op, .. }
160            | Inst::XmmMovRMImmVex { op, .. }
161            | Inst::XmmToGprImmVex { op, .. }
162            | Inst::XmmToGprVex { op, .. }
163            | Inst::GprToXmmVex { op, .. }
164            | Inst::CvtIntToFloatVex { op, .. }
165            | Inst::XmmCmpRmRVex { op, .. } => op.available_from(),
166
167            Inst::External { inst } => {
168                use cranelift_assembler_x64::Feature::*;
169                let mut features = smallvec![];
170                for f in inst.features() {
171                    match f {
172                        _64b | compat => {}
173                        sse => features.push(InstructionSet::SSE),
174                        sse2 => features.push(InstructionSet::SSE2),
175                        ssse3 => features.push(InstructionSet::SSSE3),
176                        sse41 => features.push(InstructionSet::SSE41),
177                        bmi1 => features.push(InstructionSet::BMI1),
178                        bmi2 => features.push(InstructionSet::BMI2),
179                        lzcnt => features.push(InstructionSet::Lzcnt),
180                        popcnt => features.push(InstructionSet::Popcnt),
181                        avx => features.push(InstructionSet::AVX),
182                    }
183                }
184                features
185            }
186        }
187    }
188}
189
190// Handy constructors for Insts.
191
192impl Inst {
193    pub(crate) fn nop(len: u8) -> Self {
194        debug_assert!(len <= 15);
195        Self::Nop { len }
196    }
197
198    pub(crate) fn addq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
199        let inst = if let Ok(simm8) = i8::try_from(simm32) {
200            asm::inst::addq_mi_sxb::new(dst, simm8).into()
201        } else {
202            asm::inst::addq_mi_sxl::new(dst, simm32).into()
203        };
204        Inst::External { inst }
205    }
206
207    pub(crate) fn subq_mi(dst: Writable<Reg>, simm32: i32) -> Self {
208        let inst = if let Ok(simm8) = i8::try_from(simm32) {
209            asm::inst::subq_mi_sxb::new(dst, simm8).into()
210        } else {
211            asm::inst::subq_mi_sxl::new(dst, simm32).into()
212        };
213        Inst::External { inst }
214    }
215
216    pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
217        debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
218        debug_assert!(dst.to_reg().class() == RegClass::Int);
219        // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
220        // the semantics of movl).
221        let dst_size = match dst_size {
222            OperandSize::Size64 if simm64 > u32::max_value() as u64 => OperandSize::Size64,
223            _ => OperandSize::Size32,
224        };
225        Inst::Imm {
226            dst_size,
227            simm64,
228            dst: WritableGpr::from_writable_reg(dst).unwrap(),
229        }
230    }
231
232    pub(crate) fn mov_r_r(size: OperandSize, src: Reg, dst: Writable<Reg>) -> Inst {
233        debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
234        debug_assert!(src.class() == RegClass::Int);
235        debug_assert!(dst.to_reg().class() == RegClass::Int);
236        let src = Gpr::unwrap_new(src);
237        let dst = WritableGpr::from_writable_reg(dst).unwrap();
238        Inst::MovRR { size, src, dst }
239    }
240
241    /// Convenient helper for unary float operations.
242    #[cfg(test)]
243    pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
244        src.assert_regclass_is(RegClass::Float);
245        debug_assert!(dst.to_reg().class() == RegClass::Float);
246        Inst::XmmUnaryRmR {
247            op,
248            src: XmmMemAligned::unwrap_new(src),
249            dst: WritableXmm::from_writable_reg(dst).unwrap(),
250        }
251    }
252
253    #[cfg(test)]
254    pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
255        src.assert_regclass_is(RegClass::Float);
256        debug_assert!(dst.to_reg().class() == RegClass::Float);
257        Inst::XmmRmR {
258            op,
259            src1: Xmm::unwrap_new(dst.to_reg()),
260            src2: XmmMemAligned::unwrap_new(src),
261            dst: WritableXmm::from_writable_reg(dst).unwrap(),
262        }
263    }
264
265    #[cfg(test)]
266    pub(crate) fn xmm_rmr_vex3(op: AvxOpcode, src3: RegMem, src2: Reg, dst: Writable<Reg>) -> Self {
267        src3.assert_regclass_is(RegClass::Float);
268        debug_assert!(src2.class() == RegClass::Float);
269        debug_assert!(dst.to_reg().class() == RegClass::Float);
270        Inst::XmmRmRVex3 {
271            op,
272            src3: XmmMem::unwrap_new(src3),
273            src2: Xmm::unwrap_new(src2),
274            src1: Xmm::unwrap_new(dst.to_reg()),
275            dst: WritableXmm::from_writable_reg(dst).unwrap(),
276        }
277    }
278
279    pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src1: Reg, src2: RegMem) -> Inst {
280        src2.assert_regclass_is(RegClass::Float);
281        debug_assert!(src1.class() == RegClass::Float);
282        let src2 = XmmMemAligned::unwrap_new(src2);
283        let src1 = Xmm::unwrap_new(src1);
284        Inst::XmmCmpRmR { op, src1, src2 }
285    }
286
287    #[allow(dead_code)]
288    pub(crate) fn xmm_min_max_seq(
289        size: OperandSize,
290        is_min: bool,
291        lhs: Reg,
292        rhs: Reg,
293        dst: Writable<Reg>,
294    ) -> Inst {
295        debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
296        debug_assert_eq!(lhs.class(), RegClass::Float);
297        debug_assert_eq!(rhs.class(), RegClass::Float);
298        debug_assert_eq!(dst.to_reg().class(), RegClass::Float);
299        Inst::XmmMinMaxSeq {
300            size,
301            is_min,
302            lhs: Xmm::unwrap_new(lhs),
303            rhs: Xmm::unwrap_new(rhs),
304            dst: WritableXmm::from_writable_reg(dst).unwrap(),
305        }
306    }
307
308    pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
309        src.assert_regclass_is(RegClass::Int);
310        debug_assert!(dst.to_reg().class() == RegClass::Int);
311        let src = match src {
312            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
313            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
314        };
315        let inst = match ext_mode {
316            ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),
317            ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),
318            ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),
319            ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),
320            ExtMode::LQ => {
321                // This instruction selection may seem strange but is correct in
322                // 64-bit mode: section 3.4.1.1 of the Intel manual says that
323                // "32-bit operands generate a 32-bit result, zero-extended to a
324                // 64-bit result in the destination general-purpose register."
325                // This is applicable beyond `mov` but we use this fact to
326                // zero-extend `src` into `dst`.
327                asm::inst::movl_rm::new(dst, src).into()
328            }
329        };
330        Inst::External { inst }
331    }
332
333    pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
334        src.assert_regclass_is(RegClass::Int);
335        debug_assert!(dst.to_reg().class() == RegClass::Int);
336        let src = match src {
337            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
338            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
339        };
340        let inst = match ext_mode {
341            ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),
342            ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),
343            ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),
344            ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),
345            ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),
346        };
347        Inst::External { inst }
348    }
349
350    pub(crate) fn mov_r_m(size: OperandSize, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
351        debug_assert!(src.class() == RegClass::Int);
352        Inst::MovRM {
353            size,
354            src: Gpr::unwrap_new(src),
355            dst: dst.into(),
356        }
357    }
358
359    pub(crate) fn lea(addr: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
360        debug_assert!(dst.to_reg().class() == RegClass::Int);
361        Inst::LoadEffectiveAddress {
362            addr: addr.into(),
363            dst: WritableGpr::from_writable_reg(dst).unwrap(),
364            size: OperandSize::Size64,
365        }
366    }
367
368    /// Does a comparison of dst - src for operands of size `size`, as stated by the machine
369    /// instruction semantics. Be careful with the order of parameters!
370    pub(crate) fn cmp_rmi_r(size: OperandSize, src1: Reg, src2: RegMemImm) -> Inst {
371        src2.assert_regclass_is(RegClass::Int);
372        debug_assert_eq!(src1.class(), RegClass::Int);
373        Inst::CmpRmiR {
374            size,
375            src1: Gpr::unwrap_new(src1),
376            src2: GprMemImm::unwrap_new(src2),
377            opcode: CmpOpcode::Cmp,
378        }
379    }
380
381    pub(crate) fn trap(trap_code: TrapCode) -> Inst {
382        Inst::Ud2 { trap_code }
383    }
384
385    pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
386        Inst::TrapIf { cc, trap_code }
387    }
388
389    pub(crate) fn cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
390        debug_assert!(size.is_one_of(&[
391            OperandSize::Size16,
392            OperandSize::Size32,
393            OperandSize::Size64
394        ]));
395        debug_assert!(dst.to_reg().class() == RegClass::Int);
396        Inst::Cmove {
397            size,
398            cc,
399            consequent: GprMem::unwrap_new(src),
400            alternative: Gpr::unwrap_new(dst.to_reg()),
401            dst: WritableGpr::from_writable_reg(dst).unwrap(),
402        }
403    }
404
405    pub(crate) fn push64(src: RegMemImm) -> Inst {
406        src.assert_regclass_is(RegClass::Int);
407        let src = GprMemImm::unwrap_new(src);
408        Inst::Push64 { src }
409    }
410
411    pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
412        debug_assert!(dst.to_reg().class() == RegClass::Int);
413        let dst = WritableGpr::from_writable_reg(dst).unwrap();
414        Inst::Pop64 { dst }
415    }
416
417    pub(crate) fn call_known(info: Box<CallInfo<ExternalName>>) -> Inst {
418        Inst::CallKnown { info }
419    }
420
421    pub(crate) fn call_unknown(info: Box<CallInfo<RegMem>>) -> Inst {
422        info.dest.assert_regclass_is(RegClass::Int);
423        Inst::CallUnknown { info }
424    }
425
426    pub(crate) fn ret(stack_bytes_to_pop: u32) -> Inst {
427        Inst::Ret { stack_bytes_to_pop }
428    }
429
430    pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
431        Inst::JmpKnown { dst }
432    }
433
434    pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
435        target.assert_regclass_is(RegClass::Int);
436        Inst::JmpUnknown { target }
437    }
438
439    /// Choose which instruction to use for loading a register value from memory. For loads smaller
440    /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
441    /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
442    pub(crate) fn load(
443        ty: Type,
444        from_addr: impl Into<SyntheticAmode>,
445        to_reg: Writable<Reg>,
446        ext_kind: ExtKind,
447    ) -> Inst {
448        let rc = to_reg.to_reg().class();
449        match rc {
450            RegClass::Int => {
451                let ext_mode = match ty.bytes() {
452                    1 => Some(ExtMode::BQ),
453                    2 => Some(ExtMode::WQ),
454                    4 => Some(ExtMode::LQ),
455                    8 => None,
456                    _ => unreachable!("the type should never use a scalar load: {}", ty),
457                };
458                if let Some(ext_mode) = ext_mode {
459                    // Values smaller than 64 bits must be extended in some way.
460                    match ext_kind {
461                        ExtKind::SignExtend => {
462                            Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
463                        }
464                        ExtKind::ZeroExtend => {
465                            Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
466                        }
467                        ExtKind::None => {
468                            panic!("expected an extension kind for extension mode: {ext_mode:?}")
469                        }
470                    }
471                } else {
472                    // 64-bit values can be moved directly.
473                    let from_addr = asm::GprMem::from(from_addr.into());
474                    Inst::External {
475                        inst: asm::inst::movq_rm::new(to_reg, from_addr).into(),
476                    }
477                }
478            }
479            RegClass::Float => {
480                let to_reg = to_reg.map(|r| Xmm::new(r).unwrap());
481                let from_addr = from_addr.into();
482                let inst = match ty {
483                    types::F16 | types::I8X2 => {
484                        panic!("loading a f16 or i8x2 requires multiple instructions")
485                    }
486                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
487                        asm::inst::movss_a_m::new(to_reg, from_addr).into()
488                    }
489                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
490                        asm::inst::movsd_a_m::new(to_reg, from_addr).into()
491                    }
492                    types::F32X4 => asm::inst::movups_a::new(to_reg, from_addr).into(),
493                    types::F64X2 => asm::inst::movupd_a::new(to_reg, from_addr).into(),
494                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
495                        asm::inst::movdqu_a::new(to_reg, from_addr).into()
496                    }
497                    _ => unimplemented!("unable to load type: {}", ty),
498                };
499                Inst::External { inst }
500            }
501            RegClass::Vector => unreachable!(),
502        }
503    }
504
505    /// Choose which instruction to use for storing a register value to memory.
506    pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
507        let rc = from_reg.class();
508        match rc {
509            RegClass::Int => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr),
510            RegClass::Float => {
511                let to_addr = to_addr.into();
512                let from_reg = Xmm::new(from_reg).unwrap();
513                let inst = match ty {
514                    types::F16 | types::I8X2 => {
515                        panic!("storing a f16 or i8x2 requires multiple instructions")
516                    }
517                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 32 => {
518                        asm::inst::movss_c_m::new(to_addr, from_reg).into()
519                    }
520                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 64 => {
521                        asm::inst::movsd_c_m::new(to_addr, from_reg).into()
522                    }
523                    types::F32X4 => asm::inst::movups_b::new(to_addr, from_reg).into(),
524                    types::F64X2 => asm::inst::movupd_b::new(to_addr, from_reg).into(),
525                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() == 128 => {
526                        asm::inst::movdqu_b::new(to_addr, from_reg).into()
527                    }
528                    _ => unimplemented!("unable to store type: {}", ty),
529                };
530                Inst::External { inst }
531            }
532            RegClass::Vector => unreachable!(),
533        }
534    }
535}
536
537//=============================================================================
538// Instructions: printing
539
540impl PrettyPrint for Inst {
541    fn pretty_print(&self, _size: u8) -> String {
542        fn ljustify(s: String) -> String {
543            let w = 7;
544            if s.len() >= w {
545                s
546            } else {
547                let need = usize::min(w, w - s.len());
548                s + &format!("{nil: <width$}", nil = "", width = need)
549            }
550        }
551
552        fn ljustify2(s1: String, s2: String) -> String {
553            ljustify(s1 + &s2)
554        }
555
556        fn suffix_lq(size: OperandSize) -> String {
557            match size {
558                OperandSize::Size32 => "l",
559                OperandSize::Size64 => "q",
560                _ => unreachable!(),
561            }
562            .to_string()
563        }
564
565        #[allow(dead_code)]
566        fn suffix_lqb(size: OperandSize) -> String {
567            match size {
568                OperandSize::Size32 => "l",
569                OperandSize::Size64 => "q",
570                _ => unreachable!(),
571            }
572            .to_string()
573        }
574
575        fn suffix_bwlq(size: OperandSize) -> String {
576            match size {
577                OperandSize::Size8 => "b".to_string(),
578                OperandSize::Size16 => "w".to_string(),
579                OperandSize::Size32 => "l".to_string(),
580                OperandSize::Size64 => "q".to_string(),
581            }
582        }
583
584        match self {
585            Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
586
587            Inst::CheckedSRemSeq {
588                size,
589                divisor,
590                dividend_lo,
591                dividend_hi,
592                dst_quotient,
593                dst_remainder,
594            } => {
595                let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes());
596                let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes());
597                let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes());
598                let dst_quotient =
599                    pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes());
600                let dst_remainder =
601                    pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes());
602                format!(
603                    "checked_srem_seq {dividend_lo}, {dividend_hi}, \
604                        {divisor}, {dst_quotient}, {dst_remainder}",
605                )
606            }
607
608            Inst::CheckedSRemSeq8 {
609                divisor,
610                dividend,
611                dst,
612            } => {
613                let divisor = pretty_print_reg(divisor.to_reg(), 1);
614                let dividend = pretty_print_reg(dividend.to_reg(), 1);
615                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
616                format!("checked_srem_seq {dividend}, {divisor}, {dst}")
617            }
618
619            Inst::XmmUnaryRmR { op, src, dst, .. } => {
620                let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size());
621                let src = src.pretty_print(op.src_size());
622                let op = ljustify(op.to_string());
623                format!("{op} {src}, {dst}")
624            }
625
626            Inst::XmmUnaryRmRUnaligned { op, src, dst, .. } => {
627                let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size());
628                let src = src.pretty_print(op.src_size());
629                let op = ljustify(op.to_string());
630                format!("{op} {src}, {dst}")
631            }
632
633            Inst::XmmUnaryRmRImm {
634                op, src, dst, imm, ..
635            } => {
636                let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size());
637                let src = src.pretty_print(op.src_size());
638                let op = ljustify(op.to_string());
639                format!("{op} ${imm}, {src}, {dst}")
640            }
641
642            Inst::XmmUnaryRmRVex { op, src, dst, .. } => {
643                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
644                let src = src.pretty_print(8);
645                let op = ljustify(op.to_string());
646                format!("{op} {src}, {dst}")
647            }
648
649            Inst::XmmUnaryRmRImmVex {
650                op, src, dst, imm, ..
651            } => {
652                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
653                let src = src.pretty_print(8);
654                let op = ljustify(op.to_string());
655                format!("{op} ${imm}, {src}, {dst}")
656            }
657
658            Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
659                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
660                let src = src.pretty_print(8);
661                let op = ljustify(op.to_string());
662                format!("{op} {src}, {dst}")
663            }
664
665            Inst::XmmUnaryRmRImmEvex {
666                op, src, dst, imm, ..
667            } => {
668                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
669                let src = src.pretty_print(8);
670                let op = ljustify(op.to_string());
671                format!("{op} ${imm}, {src}, {dst}")
672            }
673
674            Inst::XmmMovRMVex { op, src, dst, .. } => {
675                let src = pretty_print_reg(src.to_reg(), 8);
676                let dst = dst.pretty_print(8);
677                let op = ljustify(op.to_string());
678                format!("{op} {src}, {dst}")
679            }
680
681            Inst::XmmMovRMImmVex {
682                op, src, dst, imm, ..
683            } => {
684                let src = pretty_print_reg(src.to_reg(), 8);
685                let dst = dst.pretty_print(8);
686                let op = ljustify(op.to_string());
687                format!("{op} ${imm}, {src}, {dst}")
688            }
689
690            Inst::XmmRmR {
691                op,
692                src1,
693                src2,
694                dst,
695                ..
696            } => {
697                let src1 = pretty_print_reg(src1.to_reg(), 8);
698                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
699                let src2 = src2.pretty_print(8);
700                let op = ljustify(op.to_string());
701                format!("{op} {src1}, {src2}, {dst}")
702            }
703
704            Inst::XmmRmRUnaligned {
705                op,
706                src1,
707                src2,
708                dst,
709                ..
710            } => {
711                let src1 = pretty_print_reg(src1.to_reg(), 8);
712                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
713                let src2 = src2.pretty_print(8);
714                let op = ljustify(op.to_string());
715                format!("{op} {src1}, {src2}, {dst}")
716            }
717
718            Inst::XmmRmRBlend {
719                op,
720                src1,
721                src2,
722                mask,
723                dst,
724            } => {
725                let src1 = pretty_print_reg(src1.to_reg(), 8);
726                let mask = mask.to_reg();
727                let mask = if mask.is_virtual() {
728                    format!(" <{}>", show_ireg_sized(mask, 8))
729                } else {
730                    debug_assert_eq!(mask, regs::xmm0());
731                    String::new()
732                };
733                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
734                let src2 = src2.pretty_print(8);
735                let op = ljustify(op.to_string());
736                format!("{op} {src1}, {src2}, {dst}{mask}")
737            }
738
739            Inst::XmmRmiRVex {
740                op,
741                src1,
742                src2,
743                dst,
744                ..
745            } => {
746                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
747                let src1 = pretty_print_reg(src1.to_reg(), 8);
748                let src2 = src2.pretty_print(8);
749                let op = ljustify(op.to_string());
750                format!("{op} {src1}, {src2}, {dst}")
751            }
752
753            Inst::XmmRmRImmVex {
754                op,
755                src1,
756                src2,
757                dst,
758                imm,
759                ..
760            } => {
761                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
762                let src1 = pretty_print_reg(src1.to_reg(), 8);
763                let src2 = src2.pretty_print(8);
764                let op = ljustify(op.to_string());
765                format!("{op} ${imm}, {src1}, {src2}, {dst}")
766            }
767
768            Inst::XmmVexPinsr {
769                op,
770                src1,
771                src2,
772                dst,
773                imm,
774                ..
775            } => {
776                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
777                let src1 = pretty_print_reg(src1.to_reg(), 8);
778                let src2 = src2.pretty_print(8);
779                let op = ljustify(op.to_string());
780                format!("{op} ${imm}, {src1}, {src2}, {dst}")
781            }
782
783            Inst::XmmRmRVex3 {
784                op,
785                src1,
786                src2,
787                src3,
788                dst,
789                ..
790            } => {
791                let src1 = pretty_print_reg(src1.to_reg(), 8);
792                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
793                let src2 = pretty_print_reg(src2.to_reg(), 8);
794                let src3 = src3.pretty_print(8);
795                let op = ljustify(op.to_string());
796                format!("{op} {src1}, {src2}, {src3}, {dst}")
797            }
798
799            Inst::XmmRmRBlendVex {
800                op,
801                src1,
802                src2,
803                mask,
804                dst,
805                ..
806            } => {
807                let src1 = pretty_print_reg(src1.to_reg(), 8);
808                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
809                let src2 = src2.pretty_print(8);
810                let mask = pretty_print_reg(mask.to_reg(), 8);
811                let op = ljustify(op.to_string());
812                format!("{op} {src1}, {src2}, {mask}, {dst}")
813            }
814
815            Inst::XmmRmREvex {
816                op,
817                src1,
818                src2,
819                dst,
820                ..
821            } => {
822                let src1 = pretty_print_reg(src1.to_reg(), 8);
823                let src2 = src2.pretty_print(8);
824                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
825                let op = ljustify(op.to_string());
826                format!("{op} {src2}, {src1}, {dst}")
827            }
828
829            Inst::XmmRmREvex3 {
830                op,
831                src1,
832                src2,
833                src3,
834                dst,
835                ..
836            } => {
837                let src1 = pretty_print_reg(src1.to_reg(), 8);
838                let src2 = pretty_print_reg(src2.to_reg(), 8);
839                let src3 = src3.pretty_print(8);
840                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
841                let op = ljustify(op.to_string());
842                format!("{op} {src3}, {src2}, {src1}, {dst}")
843            }
844
845            Inst::XmmMinMaxSeq {
846                lhs,
847                rhs,
848                dst,
849                is_min,
850                size,
851            } => {
852                let rhs = pretty_print_reg(rhs.to_reg(), 8);
853                let lhs = pretty_print_reg(lhs.to_reg(), 8);
854                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
855                let op = ljustify2(
856                    if *is_min {
857                        "xmm min seq ".to_string()
858                    } else {
859                        "xmm max seq ".to_string()
860                    },
861                    format!("f{}", size.to_bits()),
862                );
863                format!("{op} {lhs}, {rhs}, {dst}")
864            }
865
866            Inst::XmmRmRImm {
867                op,
868                src1,
869                src2,
870                dst,
871                imm,
872                size,
873                ..
874            } => {
875                let src1 = pretty_print_reg(*src1, 8);
876                let dst = pretty_print_reg(dst.to_reg(), 8);
877                let src2 = src2.pretty_print(8);
878                let op = ljustify(format!(
879                    "{}{}",
880                    op.to_string(),
881                    if *size == OperandSize::Size64 {
882                        ".w"
883                    } else {
884                        ""
885                    }
886                ));
887                format!("{op} ${imm}, {src1}, {src2}, {dst}")
888            }
889
890            Inst::XmmUninitializedValue { dst } => {
891                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
892                let op = ljustify("uninit".into());
893                format!("{op} {dst}")
894            }
895
896            Inst::GprUninitializedValue { dst } => {
897                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
898                let op = ljustify("uninit".into());
899                format!("{op} {dst}")
900            }
901
902            Inst::XmmToGprVex {
903                op,
904                src,
905                dst,
906                dst_size,
907            } => {
908                let dst_size = dst_size.to_bytes();
909                let src = pretty_print_reg(src.to_reg(), 8);
910                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size);
911                let op = ljustify(op.to_string());
912                format!("{op} {src}, {dst}")
913            }
914
915            Inst::XmmToGprImmVex { op, src, dst, imm } => {
916                let src = pretty_print_reg(src.to_reg(), 8);
917                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
918                let op = ljustify(op.to_string());
919                format!("{op} ${imm}, {src}, {dst}")
920            }
921
922            Inst::GprToXmmVex {
923                op,
924                src,
925                src_size,
926                dst,
927            } => {
928                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
929                let src = src.pretty_print(src_size.to_bytes());
930                let op = ljustify(op.to_string());
931                format!("{op} {src}, {dst}")
932            }
933
934            Inst::XmmCmpRmR { op, src1, src2 } => {
935                let src1 = pretty_print_reg(src1.to_reg(), 8);
936                let src2 = src2.pretty_print(8);
937                let op = ljustify(op.to_string());
938                format!("{op} {src2}, {src1}")
939            }
940
941            Inst::CvtIntToFloatVex {
942                op,
943                src1,
944                src2,
945                dst,
946                src2_size,
947            } => {
948                let dst = pretty_print_reg(*dst.to_reg(), 8);
949                let src1 = pretty_print_reg(src1.to_reg(), 8);
950                let src2 = src2.pretty_print(src2_size.to_bytes());
951                let op = ljustify(op.to_string());
952                format!("{op} {src1}, {src2}, {dst}")
953            }
954
955            Inst::XmmCmpRmRVex { op, src1, src2 } => {
956                let src1 = pretty_print_reg(src1.to_reg(), 8);
957                let src2 = src2.pretty_print(8);
958                format!("{} {src2}, {src1}", ljustify(op.to_string()))
959            }
960
961            Inst::CvtUint64ToFloatSeq {
962                src,
963                dst,
964                dst_size,
965                tmp_gpr1,
966                tmp_gpr2,
967                ..
968            } => {
969                let src = pretty_print_reg(src.to_reg(), 8);
970                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
971                let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8);
972                let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8);
973                let op = ljustify(format!(
974                    "u64_to_{}_seq",
975                    if *dst_size == OperandSize::Size64 {
976                        "f64"
977                    } else {
978                        "f32"
979                    }
980                ));
981                format!("{op} {src}, {dst}, {tmp_gpr1}, {tmp_gpr2}")
982            }
983
984            Inst::CvtFloatToSintSeq {
985                src,
986                dst,
987                src_size,
988                dst_size,
989                tmp_xmm,
990                tmp_gpr,
991                is_saturating,
992            } => {
993                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
994                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
995                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
996                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
997                let op = ljustify(format!(
998                    "cvt_float{}_to_sint{}{}_seq",
999                    src_size.to_bits(),
1000                    dst_size.to_bits(),
1001                    if *is_saturating { "_sat" } else { "" },
1002                ));
1003                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}")
1004            }
1005
1006            Inst::CvtFloatToUintSeq {
1007                src,
1008                dst,
1009                src_size,
1010                dst_size,
1011                tmp_gpr,
1012                tmp_xmm,
1013                tmp_xmm2,
1014                is_saturating,
1015            } => {
1016                let src = pretty_print_reg(src.to_reg(), src_size.to_bytes());
1017                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
1018                let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8);
1019                let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8);
1020                let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8);
1021                let op = ljustify(format!(
1022                    "cvt_float{}_to_uint{}{}_seq",
1023                    src_size.to_bits(),
1024                    dst_size.to_bits(),
1025                    if *is_saturating { "_sat" } else { "" },
1026                ));
1027                format!("{op} {src}, {dst}, {tmp_gpr}, {tmp_xmm}, {tmp_xmm2}")
1028            }
1029
1030            Inst::Imm {
1031                dst_size,
1032                simm64,
1033                dst,
1034            } => {
1035                let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes());
1036                if *dst_size == OperandSize::Size64 {
1037                    let op = ljustify("movabsq".to_string());
1038                    let imm = *simm64 as i64;
1039                    format!("{op} ${imm}, {dst}")
1040                } else {
1041                    let op = ljustify("movl".to_string());
1042                    let imm = (*simm64 as u32) as i32;
1043                    format!("{op} ${imm}, {dst}")
1044                }
1045            }
1046
1047            Inst::MovImmM { size, simm32, dst } => {
1048                let dst = dst.pretty_print(size.to_bytes());
1049                let suffix = suffix_bwlq(*size);
1050                let imm = match *size {
1051                    OperandSize::Size8 => ((*simm32 as u8) as i8).to_string(),
1052                    OperandSize::Size16 => ((*simm32 as u16) as i16).to_string(),
1053                    OperandSize::Size32 => simm32.to_string(),
1054                    OperandSize::Size64 => (*simm32 as i64).to_string(),
1055                };
1056                let op = ljustify2("mov".to_string(), suffix);
1057                format!("{op} ${imm}, {dst}")
1058            }
1059
1060            Inst::MovRR { size, src, dst } => {
1061                let src = pretty_print_reg(src.to_reg(), size.to_bytes());
1062                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
1063                let op = ljustify2("mov".to_string(), suffix_lq(*size));
1064                format!("{op} {src}, {dst}")
1065            }
1066
1067            Inst::MovFromPReg { src, dst } => {
1068                let src: Reg = (*src).into();
1069                let src = regs::show_ireg_sized(src, 8);
1070                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1071                let op = ljustify("movq".to_string());
1072                format!("{op} {src}, {dst}")
1073            }
1074
1075            Inst::MovToPReg { src, dst } => {
1076                let src = pretty_print_reg(src.to_reg(), 8);
1077                let dst: Reg = (*dst).into();
1078                let dst = regs::show_ireg_sized(dst, 8);
1079                let op = ljustify("movq".to_string());
1080                format!("{op} {src}, {dst}")
1081            }
1082
1083            Inst::LoadEffectiveAddress { addr, dst, size } => {
1084                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
1085                let addr = addr.pretty_print(8);
1086                let op = ljustify("lea".to_string());
1087                format!("{op} {addr}, {dst}")
1088            }
1089
1090            Inst::MovRM { size, src, dst, .. } => {
1091                let src = pretty_print_reg(src.to_reg(), size.to_bytes());
1092                let dst = dst.pretty_print(size.to_bytes());
1093                let op = ljustify2("mov".to_string(), suffix_bwlq(*size));
1094                format!("{op} {src}, {dst}")
1095            }
1096
1097            Inst::CmpRmiR {
1098                size,
1099                src1,
1100                src2,
1101                opcode,
1102            } => {
1103                let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes());
1104                let src2 = src2.pretty_print(size.to_bytes());
1105                let op = match opcode {
1106                    CmpOpcode::Cmp => "cmp",
1107                    CmpOpcode::Test => "test",
1108                };
1109                let op = ljustify2(op.to_string(), suffix_bwlq(*size));
1110                format!("{op} {src2}, {src1}")
1111            }
1112
1113            Inst::Setcc { cc, dst } => {
1114                let dst = pretty_print_reg(dst.to_reg().to_reg(), 1);
1115                let op = ljustify2("set".to_string(), cc.to_string());
1116                format!("{op} {dst}")
1117            }
1118
1119            Inst::Cmove {
1120                size,
1121                cc,
1122                consequent,
1123                alternative,
1124                dst,
1125            } => {
1126                let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes());
1127                let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes());
1128                let consequent = consequent.pretty_print(size.to_bytes());
1129                let op = ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size)));
1130                format!("{op} {consequent}, {alternative}, {dst}")
1131            }
1132
1133            Inst::XmmCmove {
1134                ty,
1135                cc,
1136                consequent,
1137                alternative,
1138                dst,
1139                ..
1140            } => {
1141                let size = u8::try_from(ty.bytes()).unwrap();
1142                let alternative = pretty_print_reg(alternative.to_reg(), size);
1143                let dst = pretty_print_reg(dst.to_reg().to_reg(), size);
1144                let consequent = pretty_print_reg(consequent.to_reg(), size);
1145                let suffix = match *ty {
1146                    types::F64 => "sd",
1147                    types::F32 => "ss",
1148                    types::F16 => "ss",
1149                    types::F32X4 => "aps",
1150                    types::F64X2 => "apd",
1151                    _ => "dqa",
1152                };
1153                let cc = cc.invert();
1154                format!(
1155                    "mov{suffix} {alternative}, {dst}; \
1156                    j{cc} $next; \
1157                    mov{suffix} {consequent}, {dst}; \
1158                    $next:"
1159                )
1160            }
1161
1162            Inst::Push64 { src } => {
1163                let src = src.pretty_print(8);
1164                let op = ljustify("pushq".to_string());
1165                format!("{op} {src}")
1166            }
1167
1168            Inst::StackProbeLoop {
1169                tmp,
1170                frame_size,
1171                guard_size,
1172            } => {
1173                let tmp = pretty_print_reg(tmp.to_reg(), 8);
1174                let op = ljustify("stack_probe_loop".to_string());
1175                format!("{op} {tmp}, frame_size={frame_size}, guard_size={guard_size}")
1176            }
1177
1178            Inst::Pop64 { dst } => {
1179                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1180                let op = ljustify("popq".to_string());
1181                format!("{op} {dst}")
1182            }
1183
1184            Inst::CallKnown { info } => {
1185                let op = ljustify("call".to_string());
1186                let try_call = info
1187                    .try_call_info
1188                    .as_ref()
1189                    .map(|tci| pretty_print_try_call(tci))
1190                    .unwrap_or_default();
1191                format!("{op} {:?}{try_call}", info.dest)
1192            }
1193
1194            Inst::CallUnknown { info } => {
1195                let dest = info.dest.pretty_print(8);
1196                let op = ljustify("call".to_string());
1197                let try_call = info
1198                    .try_call_info
1199                    .as_ref()
1200                    .map(|tci| pretty_print_try_call(tci))
1201                    .unwrap_or_default();
1202                format!("{op} *{dest}{try_call}")
1203            }
1204
1205            Inst::ReturnCallKnown { info } => {
1206                let ReturnCallInfo {
1207                    uses,
1208                    new_stack_arg_size,
1209                    tmp,
1210                    dest,
1211                } = &**info;
1212                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
1213                let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}");
1214                for ret in uses {
1215                    let preg = regs::show_reg(ret.preg);
1216                    let vreg = pretty_print_reg(ret.vreg, 8);
1217                    write!(&mut s, " {vreg}={preg}").unwrap();
1218                }
1219                s
1220            }
1221
1222            Inst::ReturnCallUnknown { info } => {
1223                let ReturnCallInfo {
1224                    uses,
1225                    new_stack_arg_size,
1226                    tmp,
1227                    dest,
1228                } = &**info;
1229                let callee = pretty_print_reg(*dest, 8);
1230                let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8);
1231                let mut s =
1232                    format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}");
1233                for ret in uses {
1234                    let preg = regs::show_reg(ret.preg);
1235                    let vreg = pretty_print_reg(ret.vreg, 8);
1236                    write!(&mut s, " {vreg}={preg}").unwrap();
1237                }
1238                s
1239            }
1240
1241            Inst::Args { args } => {
1242                let mut s = "args".to_string();
1243                for arg in args {
1244                    let preg = regs::show_reg(arg.preg);
1245                    let def = pretty_print_reg(arg.vreg.to_reg(), 8);
1246                    write!(&mut s, " {def}={preg}").unwrap();
1247                }
1248                s
1249            }
1250
1251            Inst::Rets { rets } => {
1252                let mut s = "rets".to_string();
1253                for ret in rets {
1254                    let preg = regs::show_reg(ret.preg);
1255                    let vreg = pretty_print_reg(ret.vreg, 8);
1256                    write!(&mut s, " {vreg}={preg}").unwrap();
1257                }
1258                s
1259            }
1260
1261            Inst::Ret { stack_bytes_to_pop } => {
1262                let mut s = "ret".to_string();
1263                if *stack_bytes_to_pop != 0 {
1264                    write!(&mut s, " {stack_bytes_to_pop}").unwrap();
1265                }
1266                s
1267            }
1268
1269            Inst::StackSwitchBasic {
1270                store_context_ptr,
1271                load_context_ptr,
1272                in_payload0,
1273                out_payload0,
1274            } => {
1275                let store_context_ptr = pretty_print_reg(**store_context_ptr, 8);
1276                let load_context_ptr = pretty_print_reg(**load_context_ptr, 8);
1277                let in_payload0 = pretty_print_reg(**in_payload0, 8);
1278                let out_payload0 = pretty_print_reg(*out_payload0.to_reg(), 8);
1279                format!(
1280                    "{out_payload0} = stack_switch_basic {store_context_ptr}, {load_context_ptr}, {in_payload0}"
1281                )
1282            }
1283
1284            Inst::JmpKnown { dst } => {
1285                let op = ljustify("jmp".to_string());
1286                let dst = dst.to_string();
1287                format!("{op} {dst}")
1288            }
1289
1290            Inst::WinchJmpIf { cc, taken } => {
1291                let taken = taken.to_string();
1292                let op = ljustify2("j".to_string(), cc.to_string());
1293                format!("{op} {taken}")
1294            }
1295
1296            Inst::JmpCondOr {
1297                cc1,
1298                cc2,
1299                taken,
1300                not_taken,
1301            } => {
1302                let taken = taken.to_string();
1303                let not_taken = not_taken.to_string();
1304                let op = ljustify(format!("j{cc1},{cc2}"));
1305                format!("{op} {taken}; j {not_taken}")
1306            }
1307
1308            Inst::JmpCond {
1309                cc,
1310                taken,
1311                not_taken,
1312            } => {
1313                let taken = taken.to_string();
1314                let not_taken = not_taken.to_string();
1315                let op = ljustify2("j".to_string(), cc.to_string());
1316                format!("{op} {taken}; j {not_taken}")
1317            }
1318
1319            Inst::JmpTableSeq {
1320                idx, tmp1, tmp2, ..
1321            } => {
1322                let idx = pretty_print_reg(*idx, 8);
1323                let tmp1 = pretty_print_reg(tmp1.to_reg(), 8);
1324                let tmp2 = pretty_print_reg(tmp2.to_reg(), 8);
1325                let op = ljustify("br_table".into());
1326                format!("{op} {idx}, {tmp1}, {tmp2}")
1327            }
1328
1329            Inst::JmpUnknown { target } => {
1330                let target = target.pretty_print(8);
1331                let op = ljustify("jmp".to_string());
1332                format!("{op} *{target}")
1333            }
1334
1335            Inst::TrapIf { cc, trap_code, .. } => {
1336                format!("j{cc} #trap={trap_code}")
1337            }
1338
1339            Inst::TrapIfAnd {
1340                cc1,
1341                cc2,
1342                trap_code,
1343                ..
1344            } => {
1345                let cc1 = cc1.invert();
1346                let cc2 = cc2.invert();
1347                format!("trap_if_and {cc1}, {cc2}, {trap_code}")
1348            }
1349
1350            Inst::TrapIfOr {
1351                cc1,
1352                cc2,
1353                trap_code,
1354                ..
1355            } => {
1356                let cc2 = cc2.invert();
1357                format!("trap_if_or {cc1}, {cc2}, {trap_code}")
1358            }
1359
1360            Inst::LoadExtName {
1361                dst, name, offset, ..
1362            } => {
1363                let dst = pretty_print_reg(dst.to_reg(), 8);
1364                let name = name.display(None);
1365                let op = ljustify("load_ext_name".into());
1366                format!("{op} {name}+{offset}, {dst}")
1367            }
1368
1369            Inst::LockCmpxchg {
1370                ty,
1371                replacement,
1372                expected,
1373                mem,
1374                dst_old,
1375                ..
1376            } => {
1377                let size = ty.bytes() as u8;
1378                let replacement = pretty_print_reg(*replacement, size);
1379                let expected = pretty_print_reg(*expected, size);
1380                let dst_old = pretty_print_reg(dst_old.to_reg(), size);
1381                let mem = mem.pretty_print(size);
1382                let suffix = suffix_bwlq(OperandSize::from_bytes(size as u32));
1383                format!(
1384                    "lock cmpxchg{suffix} {replacement}, {mem}, expected={expected}, dst_old={dst_old}"
1385                )
1386            }
1387
1388            Inst::LockCmpxchg16b {
1389                replacement_low,
1390                replacement_high,
1391                expected_low,
1392                expected_high,
1393                mem,
1394                dst_old_low,
1395                dst_old_high,
1396                ..
1397            } => {
1398                let replacement_low = pretty_print_reg(*replacement_low, 8);
1399                let replacement_high = pretty_print_reg(*replacement_high, 8);
1400                let expected_low = pretty_print_reg(*expected_low, 8);
1401                let expected_high = pretty_print_reg(*expected_high, 8);
1402                let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8);
1403                let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8);
1404                let mem = mem.pretty_print(16);
1405                format!(
1406                    "lock cmpxchg16b {mem}, replacement={replacement_high}:{replacement_low}, expected={expected_high}:{expected_low}, dst_old={dst_old_high}:{dst_old_low}"
1407                )
1408            }
1409
1410            Inst::LockXadd {
1411                size,
1412                operand,
1413                mem,
1414                dst_old,
1415            } => {
1416                let operand = pretty_print_reg(*operand, size.to_bytes());
1417                let dst_old = pretty_print_reg(dst_old.to_reg(), size.to_bytes());
1418                let mem = mem.pretty_print(size.to_bytes());
1419                let suffix = suffix_bwlq(*size);
1420                format!("lock xadd{suffix} {operand}, {mem}, dst_old={dst_old}")
1421            }
1422
1423            Inst::Xchg {
1424                size,
1425                operand,
1426                mem,
1427                dst_old,
1428            } => {
1429                let operand = pretty_print_reg(*operand, size.to_bytes());
1430                let dst_old = pretty_print_reg(dst_old.to_reg(), size.to_bytes());
1431                let mem = mem.pretty_print(size.to_bytes());
1432                let suffix = suffix_bwlq(*size);
1433                format!("xchg{suffix} {operand}, {mem}, dst_old={dst_old}")
1434            }
1435
1436            Inst::AtomicRmwSeq { ty, op, .. } => {
1437                let ty = ty.bits();
1438                format!(
1439                    "atomically {{ {ty}_bits_at_[%r9] {op:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}"
1440                )
1441            }
1442
1443            Inst::Atomic128RmwSeq {
1444                op,
1445                mem,
1446                operand_low,
1447                operand_high,
1448                temp_low,
1449                temp_high,
1450                dst_old_low,
1451                dst_old_high,
1452            } => {
1453                let operand_low = pretty_print_reg(*operand_low, 8);
1454                let operand_high = pretty_print_reg(*operand_high, 8);
1455                let temp_low = pretty_print_reg(temp_low.to_reg(), 8);
1456                let temp_high = pretty_print_reg(temp_high.to_reg(), 8);
1457                let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8);
1458                let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8);
1459                let mem = mem.pretty_print(16);
1460                format!(
1461                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {temp_high}:{temp_low} = {dst_old_high}:{dst_old_low} {op:?} {operand_high}:{operand_low}; {mem} = {temp_high}:{temp_low} }}"
1462                )
1463            }
1464
1465            Inst::Atomic128XchgSeq {
1466                mem,
1467                operand_low,
1468                operand_high,
1469                dst_old_low,
1470                dst_old_high,
1471            } => {
1472                let operand_low = pretty_print_reg(*operand_low, 8);
1473                let operand_high = pretty_print_reg(*operand_high, 8);
1474                let dst_old_low = pretty_print_reg(dst_old_low.to_reg(), 8);
1475                let dst_old_high = pretty_print_reg(dst_old_high.to_reg(), 8);
1476                let mem = mem.pretty_print(16);
1477                format!(
1478                    "atomically {{ {dst_old_high}:{dst_old_low} = {mem}; {mem} = {operand_high}:{operand_low} }}"
1479                )
1480            }
1481
1482            Inst::Fence { kind } => match kind {
1483                FenceKind::MFence => "mfence".to_string(),
1484                FenceKind::LFence => "lfence".to_string(),
1485                FenceKind::SFence => "sfence".to_string(),
1486            },
1487
1488            Inst::Hlt => "hlt".into(),
1489
1490            Inst::Ud2 { trap_code } => format!("ud2 {trap_code}"),
1491
1492            Inst::ElfTlsGetAddr { symbol, dst } => {
1493                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1494                format!("{dst} = elf_tls_get_addr {symbol:?}")
1495            }
1496
1497            Inst::MachOTlsGetAddr { symbol, dst } => {
1498                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1499                format!("{dst} = macho_tls_get_addr {symbol:?}")
1500            }
1501
1502            Inst::CoffTlsGetAddr { symbol, dst, tmp } => {
1503                let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
1504                let tmp = tmp.to_reg().to_reg();
1505
1506                let mut s = format!("{dst} = coff_tls_get_addr {symbol:?}");
1507                if tmp.is_virtual() {
1508                    let tmp = show_ireg_sized(tmp, 8);
1509                    write!(&mut s, ", {tmp}").unwrap();
1510                };
1511
1512                s
1513            }
1514
1515            Inst::Unwind { inst } => format!("unwind {inst:?}"),
1516
1517            Inst::DummyUse { reg } => {
1518                let reg = pretty_print_reg(*reg, 8);
1519                format!("dummy_use {reg}")
1520            }
1521
1522            Inst::External { inst } => {
1523                format!("{inst}")
1524            }
1525        }
1526    }
1527}
1528
1529fn pretty_print_try_call(info: &TryCallInfo) -> String {
1530    let dests = info
1531        .exception_dests
1532        .iter()
1533        .map(|(tag, label)| format!("{tag:?}: {label:?}"))
1534        .collect::<Vec<_>>()
1535        .join(", ");
1536    format!("; jmp {:?}; catch [{dests}]", info.continuation)
1537}
1538
1539impl fmt::Debug for Inst {
1540    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1541        write!(fmt, "{}", self.pretty_print_inst(&mut Default::default()))
1542    }
1543}
1544
1545fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
1546    // Note: because we need to statically know the indices of each
1547    // reg in the operands list in order to fetch its allocation
1548    // later, we put the variable-operand-count bits (the RegMem,
1549    // RegMemImm, etc args) last. regalloc2 doesn't care what order
1550    // the operands come in; they can be freely reordered.
1551
1552    // N.B.: we MUST keep the below in careful sync with (i) emission,
1553    // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print`
1554    // method above.
1555    match inst {
1556        Inst::CheckedSRemSeq {
1557            divisor,
1558            dividend_lo,
1559            dividend_hi,
1560            dst_quotient,
1561            dst_remainder,
1562            ..
1563        } => {
1564            collector.reg_use(divisor);
1565            collector.reg_fixed_use(dividend_lo, regs::rax());
1566            collector.reg_fixed_use(dividend_hi, regs::rdx());
1567            collector.reg_fixed_def(dst_quotient, regs::rax());
1568            collector.reg_fixed_def(dst_remainder, regs::rdx());
1569        }
1570        Inst::CheckedSRemSeq8 {
1571            divisor,
1572            dividend,
1573            dst,
1574            ..
1575        } => {
1576            collector.reg_use(divisor);
1577            collector.reg_fixed_use(dividend, regs::rax());
1578            collector.reg_fixed_def(dst, regs::rax());
1579        }
1580        Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmRImm { src, dst, .. } => {
1581            collector.reg_def(dst);
1582            src.get_operands(collector);
1583        }
1584        Inst::XmmUnaryRmREvex { src, dst, .. }
1585        | Inst::XmmUnaryRmRImmEvex { src, dst, .. }
1586        | Inst::XmmUnaryRmRUnaligned { src, dst, .. }
1587        | Inst::XmmUnaryRmRVex { src, dst, .. }
1588        | Inst::XmmUnaryRmRImmVex { src, dst, .. } => {
1589            collector.reg_def(dst);
1590            src.get_operands(collector);
1591        }
1592        Inst::XmmRmR {
1593            src1, src2, dst, ..
1594        } => {
1595            collector.reg_use(src1);
1596            collector.reg_reuse_def(dst, 0);
1597            src2.get_operands(collector);
1598        }
1599        Inst::XmmRmRUnaligned {
1600            src1, src2, dst, ..
1601        } => {
1602            collector.reg_use(src1);
1603            collector.reg_reuse_def(dst, 0);
1604            src2.get_operands(collector);
1605        }
1606        Inst::XmmRmRBlend {
1607            src1,
1608            src2,
1609            mask,
1610            dst,
1611            op,
1612        } => {
1613            assert!(matches!(
1614                op,
1615                SseOpcode::Blendvpd | SseOpcode::Blendvps | SseOpcode::Pblendvb
1616            ));
1617            collector.reg_use(src1);
1618            collector.reg_fixed_use(mask, regs::xmm0());
1619            collector.reg_reuse_def(dst, 0);
1620            src2.get_operands(collector);
1621        }
1622        Inst::XmmRmiRVex {
1623            src1, src2, dst, ..
1624        } => {
1625            collector.reg_def(dst);
1626            collector.reg_use(src1);
1627            src2.get_operands(collector);
1628        }
1629        Inst::XmmRmRImmVex {
1630            src1, src2, dst, ..
1631        } => {
1632            collector.reg_def(dst);
1633            collector.reg_use(src1);
1634            src2.get_operands(collector);
1635        }
1636        Inst::XmmVexPinsr {
1637            src1, src2, dst, ..
1638        } => {
1639            collector.reg_def(dst);
1640            collector.reg_use(src1);
1641            src2.get_operands(collector);
1642        }
1643        Inst::XmmRmRVex3 {
1644            src1,
1645            src2,
1646            src3,
1647            dst,
1648            ..
1649        } => {
1650            collector.reg_use(src1);
1651            collector.reg_reuse_def(dst, 0);
1652            collector.reg_use(src2);
1653            src3.get_operands(collector);
1654        }
1655        Inst::XmmRmRBlendVex {
1656            src1,
1657            src2,
1658            mask,
1659            dst,
1660            ..
1661        } => {
1662            collector.reg_def(dst);
1663            collector.reg_use(src1);
1664            src2.get_operands(collector);
1665            collector.reg_use(mask);
1666        }
1667        Inst::XmmRmREvex {
1668            op,
1669            src1,
1670            src2,
1671            dst,
1672            ..
1673        } => {
1674            assert_ne!(*op, Avx512Opcode::Vpermi2b);
1675            collector.reg_use(src1);
1676            src2.get_operands(collector);
1677            collector.reg_def(dst);
1678        }
1679        Inst::XmmRmREvex3 {
1680            op,
1681            src1,
1682            src2,
1683            src3,
1684            dst,
1685            ..
1686        } => {
1687            assert_eq!(*op, Avx512Opcode::Vpermi2b);
1688            collector.reg_use(src1);
1689            collector.reg_use(src2);
1690            src3.get_operands(collector);
1691            collector.reg_reuse_def(dst, 0); // Reuse `src1`.
1692        }
1693        Inst::XmmRmRImm {
1694            src1, src2, dst, ..
1695        } => {
1696            collector.reg_use(src1);
1697            collector.reg_reuse_def(dst, 0);
1698            src2.get_operands(collector);
1699        }
1700        Inst::XmmUninitializedValue { dst } => collector.reg_def(dst),
1701        Inst::GprUninitializedValue { dst } => collector.reg_def(dst),
1702        Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
1703            collector.reg_use(rhs);
1704            collector.reg_use(lhs);
1705            collector.reg_reuse_def(dst, 0); // Reuse RHS.
1706        }
1707        Inst::XmmMovRMVex { src, dst, .. } | Inst::XmmMovRMImmVex { src, dst, .. } => {
1708            collector.reg_use(src);
1709            dst.get_operands(collector);
1710        }
1711        Inst::XmmCmpRmR { src1, src2, .. } => {
1712            collector.reg_use(src1);
1713            src2.get_operands(collector);
1714        }
1715        Inst::XmmCmpRmRVex { src1, src2, .. } => {
1716            collector.reg_use(src1);
1717            src2.get_operands(collector);
1718        }
1719        Inst::Imm { dst, .. } => {
1720            collector.reg_def(dst);
1721        }
1722        Inst::MovRR { src, dst, .. } => {
1723            collector.reg_use(src);
1724            collector.reg_def(dst);
1725        }
1726        Inst::MovFromPReg { dst, src } => {
1727            debug_assert!(dst.to_reg().to_reg().is_virtual());
1728            collector.reg_fixed_nonallocatable(*src);
1729            collector.reg_def(dst);
1730        }
1731        Inst::MovToPReg { dst, src } => {
1732            debug_assert!(src.to_reg().is_virtual());
1733            collector.reg_use(src);
1734            collector.reg_fixed_nonallocatable(*dst);
1735        }
1736        Inst::XmmToGprVex { src, dst, .. } | Inst::XmmToGprImmVex { src, dst, .. } => {
1737            collector.reg_use(src);
1738            collector.reg_def(dst);
1739        }
1740        Inst::GprToXmmVex { src, dst, .. } => {
1741            collector.reg_def(dst);
1742            src.get_operands(collector);
1743        }
1744        Inst::CvtIntToFloatVex {
1745            src1, src2, dst, ..
1746        } => {
1747            collector.reg_def(dst);
1748            collector.reg_use(src1);
1749            src2.get_operands(collector);
1750        }
1751        Inst::CvtUint64ToFloatSeq {
1752            src,
1753            dst,
1754            tmp_gpr1,
1755            tmp_gpr2,
1756            ..
1757        } => {
1758            collector.reg_use(src);
1759            collector.reg_early_def(dst);
1760            collector.reg_early_def(tmp_gpr1);
1761            collector.reg_early_def(tmp_gpr2);
1762        }
1763        Inst::CvtFloatToSintSeq {
1764            src,
1765            dst,
1766            tmp_xmm,
1767            tmp_gpr,
1768            ..
1769        } => {
1770            collector.reg_use(src);
1771            collector.reg_early_def(dst);
1772            collector.reg_early_def(tmp_gpr);
1773            collector.reg_early_def(tmp_xmm);
1774        }
1775        Inst::CvtFloatToUintSeq {
1776            src,
1777            dst,
1778            tmp_gpr,
1779            tmp_xmm,
1780            tmp_xmm2,
1781            ..
1782        } => {
1783            collector.reg_use(src);
1784            collector.reg_early_def(dst);
1785            collector.reg_early_def(tmp_gpr);
1786            collector.reg_early_def(tmp_xmm);
1787            collector.reg_early_def(tmp_xmm2);
1788        }
1789
1790        Inst::MovImmM { dst, .. } => {
1791            dst.get_operands(collector);
1792        }
1793        Inst::LoadEffectiveAddress { addr: src, dst, .. } => {
1794            collector.reg_def(dst);
1795            src.get_operands(collector);
1796        }
1797        Inst::MovRM { src, dst, .. } => {
1798            collector.reg_use(src);
1799            dst.get_operands(collector);
1800        }
1801        Inst::CmpRmiR { src1, src2, .. } => {
1802            collector.reg_use(src1);
1803            src2.get_operands(collector);
1804        }
1805        Inst::Setcc { dst, .. } => {
1806            collector.reg_def(dst);
1807        }
1808        Inst::Cmove {
1809            consequent,
1810            alternative,
1811            dst,
1812            ..
1813        } => {
1814            collector.reg_use(alternative);
1815            collector.reg_reuse_def(dst, 0);
1816            consequent.get_operands(collector);
1817        }
1818        Inst::XmmCmove {
1819            consequent,
1820            alternative,
1821            dst,
1822            ..
1823        } => {
1824            collector.reg_use(alternative);
1825            collector.reg_reuse_def(dst, 0);
1826            collector.reg_use(consequent);
1827        }
1828        Inst::Push64 { src } => {
1829            src.get_operands(collector);
1830        }
1831        Inst::Pop64 { dst } => {
1832            collector.reg_def(dst);
1833        }
1834        Inst::StackProbeLoop { tmp, .. } => {
1835            collector.reg_early_def(tmp);
1836        }
1837
1838        Inst::CallKnown { info } => {
1839            // Probestack is special and is only inserted after
1840            // regalloc, so we do not need to represent its ABI to the
1841            // register allocator. Assert that we don't alter that
1842            // arrangement.
1843            let CallInfo {
1844                uses,
1845                defs,
1846                clobbers,
1847                dest,
1848                ..
1849            } = &mut **info;
1850            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1851            for CallArgPair { vreg, preg } in uses {
1852                collector.reg_fixed_use(vreg, *preg);
1853            }
1854            for CallRetPair { vreg, location } in defs {
1855                match location {
1856                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1857                    RetLocation::Stack(..) => collector.any_def(vreg),
1858                }
1859            }
1860            collector.reg_clobbers(*clobbers);
1861        }
1862
1863        Inst::CallUnknown { info } => {
1864            let CallInfo {
1865                uses,
1866                defs,
1867                clobbers,
1868                callee_conv,
1869                dest,
1870                ..
1871            } = &mut **info;
1872            match dest {
1873                RegMem::Reg { reg } if *callee_conv == CallConv::Winch => {
1874                    // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1875                    // This shouldn't be a fixed register constraint. r10 is caller-saved, so this
1876                    // should be safe to use.
1877                    collector.reg_fixed_use(reg, regs::r10());
1878                }
1879                _ => dest.get_operands(collector),
1880            }
1881            for CallArgPair { vreg, preg } in uses {
1882                collector.reg_fixed_use(vreg, *preg);
1883            }
1884            for CallRetPair { vreg, location } in defs {
1885                match location {
1886                    RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
1887                    RetLocation::Stack(..) => collector.any_def(vreg),
1888                }
1889            }
1890            collector.reg_clobbers(*clobbers);
1891        }
1892        Inst::StackSwitchBasic {
1893            store_context_ptr,
1894            load_context_ptr,
1895            in_payload0,
1896            out_payload0,
1897        } => {
1898            collector.reg_use(load_context_ptr);
1899            collector.reg_use(store_context_ptr);
1900            collector.reg_fixed_use(in_payload0, stack_switch::payload_register());
1901            collector.reg_fixed_def(out_payload0, stack_switch::payload_register());
1902
1903            let mut clobbers = crate::isa::x64::abi::ALL_CLOBBERS;
1904            // The return/payload reg must not be included in the clobber set
1905            clobbers.remove(
1906                stack_switch::payload_register()
1907                    .to_real_reg()
1908                    .unwrap()
1909                    .into(),
1910            );
1911            collector.reg_clobbers(clobbers);
1912        }
1913
1914        Inst::ReturnCallKnown { info } => {
1915            let ReturnCallInfo {
1916                dest, uses, tmp, ..
1917            } = &mut **info;
1918            collector.reg_fixed_def(tmp, regs::r11());
1919            // Same as in the `Inst::CallKnown` branch.
1920            debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
1921            for CallArgPair { vreg, preg } in uses {
1922                collector.reg_fixed_use(vreg, *preg);
1923            }
1924        }
1925
1926        Inst::ReturnCallUnknown { info } => {
1927            let ReturnCallInfo {
1928                dest, uses, tmp, ..
1929            } = &mut **info;
1930
1931            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
1932            // This shouldn't be a fixed register constraint, but it's not clear how to
1933            // pick a register that won't be clobbered by the callee-save restore code
1934            // emitted with a return_call_indirect. r10 is caller-saved, so this should be
1935            // safe to use.
1936            collector.reg_fixed_use(dest, regs::r10());
1937
1938            collector.reg_fixed_def(tmp, regs::r11());
1939            for CallArgPair { vreg, preg } in uses {
1940                collector.reg_fixed_use(vreg, *preg);
1941            }
1942        }
1943
1944        Inst::JmpTableSeq {
1945            idx, tmp1, tmp2, ..
1946        } => {
1947            collector.reg_use(idx);
1948            collector.reg_early_def(tmp1);
1949            // In the sequence emitted for this pseudoinstruction in emit.rs,
1950            // tmp2 is only written after idx is read, so it doesn't need to be
1951            // an early def.
1952            collector.reg_def(tmp2);
1953        }
1954
1955        Inst::JmpUnknown { target } => {
1956            target.get_operands(collector);
1957        }
1958
1959        Inst::LoadExtName { dst, .. } => {
1960            collector.reg_def(dst);
1961        }
1962
1963        Inst::LockCmpxchg {
1964            replacement,
1965            expected,
1966            mem,
1967            dst_old,
1968            ..
1969        } => {
1970            collector.reg_use(replacement);
1971            collector.reg_fixed_use(expected, regs::rax());
1972            collector.reg_fixed_def(dst_old, regs::rax());
1973            mem.get_operands(collector);
1974        }
1975
1976        Inst::LockCmpxchg16b {
1977            replacement_low,
1978            replacement_high,
1979            expected_low,
1980            expected_high,
1981            mem,
1982            dst_old_low,
1983            dst_old_high,
1984            ..
1985        } => {
1986            collector.reg_fixed_use(replacement_low, regs::rbx());
1987            collector.reg_fixed_use(replacement_high, regs::rcx());
1988            collector.reg_fixed_use(expected_low, regs::rax());
1989            collector.reg_fixed_use(expected_high, regs::rdx());
1990            collector.reg_fixed_def(dst_old_low, regs::rax());
1991            collector.reg_fixed_def(dst_old_high, regs::rdx());
1992            mem.get_operands(collector);
1993        }
1994
1995        Inst::LockXadd {
1996            operand,
1997            mem,
1998            dst_old,
1999            ..
2000        } => {
2001            collector.reg_use(operand);
2002            collector.reg_reuse_def(dst_old, 0);
2003            mem.get_operands(collector);
2004        }
2005
2006        Inst::Xchg {
2007            operand,
2008            mem,
2009            dst_old,
2010            ..
2011        } => {
2012            collector.reg_use(operand);
2013            collector.reg_reuse_def(dst_old, 0);
2014            mem.get_operands(collector);
2015        }
2016
2017        Inst::AtomicRmwSeq {
2018            operand,
2019            temp,
2020            dst_old,
2021            mem,
2022            ..
2023        } => {
2024            collector.reg_late_use(operand);
2025            collector.reg_early_def(temp);
2026            // This `fixed_def` is needed because `CMPXCHG` always uses this
2027            // register implicitly.
2028            collector.reg_fixed_def(dst_old, regs::rax());
2029            mem.get_operands_late(collector)
2030        }
2031
2032        Inst::Atomic128RmwSeq {
2033            operand_low,
2034            operand_high,
2035            temp_low,
2036            temp_high,
2037            dst_old_low,
2038            dst_old_high,
2039            mem,
2040            ..
2041        } => {
2042            // All registers are collected in the `Late` position so that they don't overlap.
2043            collector.reg_late_use(operand_low);
2044            collector.reg_late_use(operand_high);
2045            collector.reg_fixed_def(temp_low, regs::rbx());
2046            collector.reg_fixed_def(temp_high, regs::rcx());
2047            collector.reg_fixed_def(dst_old_low, regs::rax());
2048            collector.reg_fixed_def(dst_old_high, regs::rdx());
2049            mem.get_operands_late(collector)
2050        }
2051
2052        Inst::Atomic128XchgSeq {
2053            operand_low,
2054            operand_high,
2055            dst_old_low,
2056            dst_old_high,
2057            mem,
2058            ..
2059        } => {
2060            // All registers are collected in the `Late` position so that they don't overlap.
2061            collector.reg_fixed_late_use(operand_low, regs::rbx());
2062            collector.reg_fixed_late_use(operand_high, regs::rcx());
2063            collector.reg_fixed_def(dst_old_low, regs::rax());
2064            collector.reg_fixed_def(dst_old_high, regs::rdx());
2065            mem.get_operands_late(collector)
2066        }
2067
2068        Inst::Args { args } => {
2069            for ArgPair { vreg, preg } in args {
2070                collector.reg_fixed_def(vreg, *preg);
2071            }
2072        }
2073
2074        Inst::Rets { rets } => {
2075            // The return value(s) are live-out; we represent this
2076            // with register uses on the return instruction.
2077            for RetPair { vreg, preg } in rets {
2078                collector.reg_fixed_use(vreg, *preg);
2079            }
2080        }
2081
2082        Inst::JmpKnown { .. }
2083        | Inst::WinchJmpIf { .. }
2084        | Inst::JmpCond { .. }
2085        | Inst::JmpCondOr { .. }
2086        | Inst::Ret { .. }
2087        | Inst::Nop { .. }
2088        | Inst::TrapIf { .. }
2089        | Inst::TrapIfAnd { .. }
2090        | Inst::TrapIfOr { .. }
2091        | Inst::Hlt
2092        | Inst::Ud2 { .. }
2093        | Inst::Fence { .. } => {
2094            // No registers are used.
2095        }
2096
2097        Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
2098            collector.reg_fixed_def(dst, regs::rax());
2099            // All caller-saves are clobbered.
2100            //
2101            // We use the SysV calling convention here because the
2102            // pseudoinstruction (and relocation that it emits) is specific to
2103            // ELF systems; other x86-64 targets with other conventions (i.e.,
2104            // Windows) use different TLS strategies.
2105            let mut clobbers =
2106                X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV, false);
2107            clobbers.remove(regs::gpr_preg(regs::ENC_RAX));
2108            collector.reg_clobbers(clobbers);
2109        }
2110
2111        Inst::CoffTlsGetAddr { dst, tmp, .. } => {
2112            // We also use the gs register. But that register is not allocatable by the
2113            // register allocator, so we don't need to mark it as used here.
2114
2115            // We use %rax to set the address
2116            collector.reg_fixed_def(dst, regs::rax());
2117
2118            // We use %rcx as a temporary variable to load the _tls_index
2119            collector.reg_fixed_def(tmp, regs::rcx());
2120        }
2121
2122        Inst::Unwind { .. } => {}
2123
2124        Inst::DummyUse { reg } => {
2125            collector.reg_use(reg);
2126        }
2127
2128        Inst::External { inst } => {
2129            inst.visit(&mut external::RegallocVisitor { collector });
2130        }
2131    }
2132}
2133
2134//=============================================================================
2135// Instructions: misc functions and external interface
2136
2137impl MachInst for Inst {
2138    type ABIMachineSpec = X64ABIMachineSpec;
2139
2140    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
2141        x64_get_operands(self, collector)
2142    }
2143
2144    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
2145        use asm::inst::Inst as I;
2146        match self {
2147            // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
2148            // out the upper 32 bits of the destination.  For example, we could
2149            // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
2150            // %reg.
2151            Self::MovRR { size, src, dst, .. } if *size == OperandSize::Size64 => {
2152                Some((dst.to_writable_reg(), src.to_reg()))
2153            }
2154            // Note that `movss_a_r` and `movsd_a_r` are specifically omitted
2155            // here because they only overwrite the low bits in the destination
2156            // register, otherwise preserving the upper bits. That can be used
2157            // for lane-insertion instructions, for example, meaning it's not
2158            // classified as a register move.
2159            //
2160            // Otherwise though all register-to-register movement instructions
2161            // which move 128-bits are registered as moves.
2162            Self::External {
2163                inst:
2164                    I::movaps_a(asm::inst::movaps_a { xmm1, xmm_m128 })
2165                    | I::movups_a(asm::inst::movups_a { xmm1, xmm_m128 })
2166                    | I::movapd_a(asm::inst::movapd_a { xmm1, xmm_m128 })
2167                    | I::movupd_a(asm::inst::movupd_a { xmm1, xmm_m128 })
2168                    | I::movdqa_a(asm::inst::movdqa_a { xmm1, xmm_m128 })
2169                    | I::movdqu_a(asm::inst::movdqu_a { xmm1, xmm_m128 }),
2170            } => match xmm_m128 {
2171                asm::XmmMem::Xmm(xmm2) => Some((xmm1.as_ref().map(|r| r.to_reg()), xmm2.to_reg())),
2172                asm::XmmMem::Mem(_) => None,
2173            },
2174            // In addition to the "A" format of instructions above also
2175            // recognize the "B" format which while it can be used for stores it
2176            // can also be used for register moves.
2177            Self::External {
2178                inst:
2179                    I::movaps_b(asm::inst::movaps_b { xmm_m128, xmm1 })
2180                    | I::movups_b(asm::inst::movups_b { xmm_m128, xmm1 })
2181                    | I::movapd_b(asm::inst::movapd_b { xmm_m128, xmm1 })
2182                    | I::movupd_b(asm::inst::movupd_b { xmm_m128, xmm1 })
2183                    | I::movdqa_b(asm::inst::movdqa_b { xmm_m128, xmm1 })
2184                    | I::movdqu_b(asm::inst::movdqu_b { xmm_m128, xmm1 }),
2185            } => match xmm_m128 {
2186                asm::XmmMem::Xmm(dst) => Some((dst.map(|r| r.to_reg()), xmm1.as_ref().to_reg())),
2187                asm::XmmMem::Mem(_) => None,
2188            },
2189            _ => None,
2190        }
2191    }
2192
2193    fn is_included_in_clobbers(&self) -> bool {
2194        match self {
2195            &Inst::Args { .. } => false,
2196            _ => true,
2197        }
2198    }
2199
2200    fn is_trap(&self) -> bool {
2201        match self {
2202            Self::Ud2 { .. } => true,
2203            _ => false,
2204        }
2205    }
2206
2207    fn is_args(&self) -> bool {
2208        match self {
2209            Self::Args { .. } => true,
2210            _ => false,
2211        }
2212    }
2213
2214    fn is_term(&self) -> MachTerminator {
2215        match self {
2216            // Interesting cases.
2217            &Self::Rets { .. } => MachTerminator::Ret,
2218            &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => {
2219                MachTerminator::RetCall
2220            }
2221            &Self::JmpKnown { .. } => MachTerminator::Branch,
2222            &Self::JmpCond { .. } => MachTerminator::Branch,
2223            &Self::JmpCondOr { .. } => MachTerminator::Branch,
2224            &Self::JmpTableSeq { .. } => MachTerminator::Branch,
2225            &Self::CallKnown { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,
2226            &Self::CallUnknown { ref info } if info.try_call_info.is_some() => {
2227                MachTerminator::Branch
2228            }
2229            // All other cases are boring.
2230            _ => MachTerminator::None,
2231        }
2232    }
2233
2234    fn is_low_level_branch(&self) -> bool {
2235        match self {
2236            &Self::WinchJmpIf { .. } => true,
2237            _ => false,
2238        }
2239    }
2240
2241    fn is_mem_access(&self) -> bool {
2242        panic!("TODO FILL ME OUT")
2243    }
2244
2245    fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
2246        trace!(
2247            "Inst::gen_move {:?} -> {:?} (type: {:?})",
2248            src_reg,
2249            dst_reg.to_reg(),
2250            ty
2251        );
2252        let rc_dst = dst_reg.to_reg().class();
2253        let rc_src = src_reg.class();
2254        // If this isn't true, we have gone way off the rails.
2255        debug_assert!(rc_dst == rc_src);
2256        match rc_dst {
2257            RegClass::Int => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg),
2258            RegClass::Float => {
2259                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
2260                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
2261                // those, which may write more lanes that we need, but are specified to have
2262                // zero-latency.
2263                let dst_reg = dst_reg.map(|r| Xmm::new(r).unwrap());
2264                let src_reg = Xmm::new(src_reg).unwrap();
2265                let inst = match ty {
2266                    types::F16 | types::F32 | types::F64 | types::F32X4 => {
2267                        asm::inst::movaps_a::new(dst_reg, src_reg).into()
2268                    }
2269                    types::F64X2 => asm::inst::movapd_a::new(dst_reg, src_reg).into(),
2270                    _ if (ty.is_float() || ty.is_vector()) && ty.bits() <= 128 => {
2271                        asm::inst::movdqa_a::new(dst_reg, src_reg).into()
2272                    }
2273                    _ => unimplemented!("unable to move type: {}", ty),
2274                };
2275                Inst::External { inst }
2276            }
2277            RegClass::Vector => unreachable!(),
2278        }
2279    }
2280
2281    fn gen_nop(preferred_size: usize) -> Inst {
2282        Inst::nop(std::cmp::min(preferred_size, 15) as u8)
2283    }
2284
2285    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
2286        match ty {
2287            types::I8 => Ok((&[RegClass::Int], &[types::I8])),
2288            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
2289            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
2290            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
2291            types::F16 => Ok((&[RegClass::Float], &[types::F16])),
2292            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
2293            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
2294            types::F128 => Ok((&[RegClass::Float], &[types::F128])),
2295            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
2296            _ if ty.is_vector() && ty.bits() <= 128 => {
2297                let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];
2298                Ok((
2299                    &[RegClass::Float],
2300                    slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),
2301                ))
2302            }
2303            _ => Err(CodegenError::Unsupported(format!(
2304                "Unexpected SSA-value type: {ty}"
2305            ))),
2306        }
2307    }
2308
2309    fn canonical_type_for_rc(rc: RegClass) -> Type {
2310        match rc {
2311            RegClass::Float => types::I8X16,
2312            RegClass::Int => types::I64,
2313            RegClass::Vector => unreachable!(),
2314        }
2315    }
2316
2317    fn gen_jump(label: MachLabel) -> Inst {
2318        Inst::jmp_known(label)
2319    }
2320
2321    fn gen_imm_u64(value: u64, dst: Writable<Reg>) -> Option<Self> {
2322        Some(Inst::imm(OperandSize::Size64, value, dst))
2323    }
2324
2325    fn gen_imm_f64(value: f64, tmp: Writable<Reg>, dst: Writable<Reg>) -> SmallVec<[Self; 2]> {
2326        let imm_to_gpr = Inst::imm(OperandSize::Size64, value.to_bits(), tmp);
2327        let gpr_to_xmm = Inst::External {
2328            inst: asm::inst::movq_a::new(dst.map(|r| Xmm::new(r).unwrap()), tmp.to_reg()).into(),
2329        };
2330        smallvec![imm_to_gpr, gpr_to_xmm]
2331    }
2332
2333    fn gen_dummy_use(reg: Reg) -> Self {
2334        Inst::DummyUse { reg }
2335    }
2336
2337    fn worst_case_size() -> CodeOffset {
2338        15
2339    }
2340
2341    fn ref_type_regclass(_: &settings::Flags) -> RegClass {
2342        RegClass::Int
2343    }
2344
2345    fn is_safepoint(&self) -> bool {
2346        match self {
2347            Inst::CallKnown { .. } | Inst::CallUnknown { .. } => true,
2348            _ => false,
2349        }
2350    }
2351
2352    fn function_alignment() -> FunctionAlignment {
2353        FunctionAlignment {
2354            minimum: 1,
2355            // Change the alignment from 16-bytes to 32-bytes for better performance.
2356            // fix-8573: https://github.com/bytecodealliance/wasmtime/issues/8573
2357            preferred: 32,
2358        }
2359    }
2360
2361    type LabelUse = LabelUse;
2362
2363    const TRAP_OPCODE: &'static [u8] = &[0x0f, 0x0b];
2364}
2365
2366/// Constant state used during emissions of a sequence of instructions.
2367pub struct EmitInfo {
2368    pub(super) flags: settings::Flags,
2369    isa_flags: x64_settings::Flags,
2370}
2371
2372impl EmitInfo {
2373    /// Create a constant state for emission of instructions.
2374    pub fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
2375        Self { flags, isa_flags }
2376    }
2377}
2378
2379impl MachInstEmit for Inst {
2380    type State = EmitState;
2381    type Info = EmitInfo;
2382
2383    fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
2384        emit::emit(self, sink, info, state);
2385    }
2386
2387    fn pretty_print_inst(&self, _: &mut Self::State) -> String {
2388        PrettyPrint::pretty_print(self, 0)
2389    }
2390}
2391
2392/// A label-use (internal relocation) in generated code.
2393#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2394pub enum LabelUse {
2395    /// A 32-bit offset from location of relocation itself, added to the existing value at that
2396    /// location. Used for control flow instructions which consider an offset from the start of the
2397    /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
2398    JmpRel32,
2399
2400    /// A 32-bit offset from location of relocation itself, added to the existing value at that
2401    /// location.
2402    PCRel32,
2403}
2404
2405impl MachInstLabelUse for LabelUse {
2406    const ALIGN: CodeOffset = 1;
2407
2408    fn max_pos_range(self) -> CodeOffset {
2409        match self {
2410            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
2411        }
2412    }
2413
2414    fn max_neg_range(self) -> CodeOffset {
2415        match self {
2416            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
2417        }
2418    }
2419
2420    fn patch_size(self) -> CodeOffset {
2421        match self {
2422            LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
2423        }
2424    }
2425
2426    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
2427        let pc_rel = (label_offset as i64) - (use_offset as i64);
2428        debug_assert!(pc_rel <= self.max_pos_range() as i64);
2429        debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
2430        let pc_rel = pc_rel as u32;
2431        match self {
2432            LabelUse::JmpRel32 => {
2433                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2434                let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
2435                buffer.copy_from_slice(&value.to_le_bytes()[..]);
2436            }
2437            LabelUse::PCRel32 => {
2438                let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
2439                let value = pc_rel.wrapping_add(addend);
2440                buffer.copy_from_slice(&value.to_le_bytes()[..]);
2441            }
2442        }
2443    }
2444
2445    fn supports_veneer(self) -> bool {
2446        match self {
2447            LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
2448        }
2449    }
2450
2451    fn veneer_size(self) -> CodeOffset {
2452        match self {
2453            LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
2454        }
2455    }
2456
2457    fn worst_case_veneer_size() -> CodeOffset {
2458        0
2459    }
2460
2461    fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
2462        match self {
2463            LabelUse::JmpRel32 | LabelUse::PCRel32 => {
2464                panic!("Veneer not supported for JumpRel32 label-use.");
2465            }
2466        }
2467    }
2468
2469    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self> {
2470        match (reloc, addend) {
2471            (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32),
2472            _ => None,
2473        }
2474    }
2475}