cranelift_codegen/isa/aarch64/inst/
emit.rs

1//! AArch64 ISA: binary code emission.
2
3use cranelift_control::ControlPlane;
4
5use crate::binemit::StackMap;
6use crate::ir::{self, types::*};
7use crate::isa::aarch64::inst::*;
8use crate::trace;
9
10/// Memory addressing mode finalization: convert "special" modes (e.g.,
11/// generic arbitrary stack offset) into real addressing modes, possibly by
12/// emitting some helper instructions that come immediately before the use
13/// of this amode.
14pub fn mem_finalize(
15    sink: Option<&mut MachBuffer<Inst>>,
16    mem: &AMode,
17    access_ty: Type,
18    state: &EmitState,
19) -> (SmallVec<[Inst; 4]>, AMode) {
20    match mem {
21        &AMode::RegOffset { off, .. }
22        | &AMode::SPOffset { off }
23        | &AMode::FPOffset { off }
24        | &AMode::IncomingArg { off }
25        | &AMode::SlotOffset { off } => {
26            let basereg = match mem {
27                &AMode::RegOffset { rn, .. } => rn,
28                &AMode::SPOffset { .. }
29                | &AMode::SlotOffset { .. }
30                | &AMode::IncomingArg { .. } => stack_reg(),
31                &AMode::FPOffset { .. } => fp_reg(),
32                _ => unreachable!(),
33            };
34            let off = match mem {
35                &AMode::IncomingArg { .. } => {
36                    let frame_layout = state.frame_layout();
37                    i64::from(
38                        frame_layout.setup_area_size
39                            + frame_layout.tail_args_size
40                            + frame_layout.clobber_size
41                            + frame_layout.fixed_frame_storage_size
42                            + frame_layout.outgoing_args_size,
43                    ) - off
44                }
45                &AMode::SlotOffset { .. } => {
46                    let adj = i64::from(state.frame_layout().outgoing_args_size);
47                    trace!(
48                        "mem_finalize: slot offset {} + adj {} -> {}",
49                        off,
50                        adj,
51                        off + adj
52                    );
53                    off + adj
54                }
55                _ => off,
56            };
57
58            if let Some(simm9) = SImm9::maybe_from_i64(off) {
59                let mem = AMode::Unscaled { rn: basereg, simm9 };
60                (smallvec![], mem)
61            } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {
62                let mem = AMode::UnsignedOffset {
63                    rn: basereg,
64                    uimm12,
65                };
66                (smallvec![], mem)
67            } else {
68                let tmp = writable_spilltmp_reg();
69                (
70                    Inst::load_constant(tmp, off as u64, &mut |_| tmp),
71                    AMode::RegExtended {
72                        rn: basereg,
73                        rm: tmp.to_reg(),
74                        extendop: ExtendOp::SXTX,
75                    },
76                )
77            }
78        }
79
80        AMode::Const { addr } => {
81            let sink = match sink {
82                Some(sink) => sink,
83                None => return (smallvec![], mem.clone()),
84            };
85            let label = sink.get_label_for_constant(*addr);
86            let label = MemLabel::Mach(label);
87            (smallvec![], AMode::Label { label })
88        }
89
90        _ => (smallvec![], mem.clone()),
91    }
92}
93
94//=============================================================================
95// Instructions and subcomponents: emission
96
97pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {
98    assert_eq!(m.class(), RegClass::Int);
99    u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
100}
101
102pub(crate) fn machreg_to_vec(m: Reg) -> u32 {
103    assert_eq!(m.class(), RegClass::Float);
104    u32::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
105}
106
107fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
108    u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
109}
110
111pub(crate) fn enc_arith_rrr(
112    bits_31_21: u32,
113    bits_15_10: u32,
114    rd: Writable<Reg>,
115    rn: Reg,
116    rm: Reg,
117) -> u32 {
118    (bits_31_21 << 21)
119        | (bits_15_10 << 10)
120        | machreg_to_gpr(rd.to_reg())
121        | (machreg_to_gpr(rn) << 5)
122        | (machreg_to_gpr(rm) << 16)
123}
124
125fn enc_arith_rr_imm12(
126    bits_31_24: u32,
127    immshift: u32,
128    imm12: u32,
129    rn: Reg,
130    rd: Writable<Reg>,
131) -> u32 {
132    (bits_31_24 << 24)
133        | (immshift << 22)
134        | (imm12 << 10)
135        | (machreg_to_gpr(rn) << 5)
136        | machreg_to_gpr(rd.to_reg())
137}
138
139fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
140    (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
141}
142
143fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
144    (top11 << 21)
145        | (machreg_to_gpr(rm) << 16)
146        | (bit15 << 15)
147        | (machreg_to_gpr(ra) << 10)
148        | (machreg_to_gpr(rn) << 5)
149        | machreg_to_gpr(rd.to_reg())
150}
151
152fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
153    assert!(off_26_0 < (1 << 26));
154    (op_31_26 << 26) | off_26_0
155}
156
157fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
158    assert!(off_18_0 < (1 << 19));
159    (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
160}
161
162fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
163    assert!(off_18_0 < (1 << 19));
164    assert!(cond < (1 << 4));
165    (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
166}
167
168fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
169    match kind {
170        CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
171        CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
172        CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
173    }
174}
175
176fn enc_test_bit_and_branch(
177    kind: TestBitAndBranchKind,
178    taken: BranchTarget,
179    reg: Reg,
180    bit: u8,
181) -> u32 {
182    assert!(bit < 64);
183    let op_31 = u32::from(bit >> 5);
184    let op_23_19 = u32::from(bit & 0b11111);
185    let op_30_24 = 0b0110110
186        | match kind {
187            TestBitAndBranchKind::Z => 0,
188            TestBitAndBranchKind::NZ => 1,
189        };
190    (op_31 << 31)
191        | (op_30_24 << 24)
192        | (op_23_19 << 19)
193        | (taken.as_offset14_or_zero() << 5)
194        | machreg_to_gpr(reg)
195}
196
197fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
198    assert!(imm.shift <= 0b11);
199    let op = match op {
200        MoveWideOp::MovN => 0b00,
201        MoveWideOp::MovZ => 0b10,
202    };
203    0x12800000
204        | size.sf_bit() << 31
205        | op << 29
206        | u32::from(imm.shift) << 21
207        | u32::from(imm.bits) << 5
208        | machreg_to_gpr(rd.to_reg())
209}
210
211fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {
212    assert!(imm.shift <= 0b11);
213    0x72800000
214        | size.sf_bit() << 31
215        | u32::from(imm.shift) << 21
216        | u32::from(imm.bits) << 5
217        | machreg_to_gpr(rd.to_reg())
218}
219
220fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
221    (op_31_22 << 22)
222        | (simm7.bits() << 15)
223        | (machreg_to_gpr(rt2) << 10)
224        | (machreg_to_gpr(rn) << 5)
225        | machreg_to_gpr(rt)
226}
227
228fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
229    (op_31_22 << 22)
230        | (simm9.bits() << 12)
231        | (op_11_10 << 10)
232        | (machreg_to_gpr(rn) << 5)
233        | machreg_to_gpr_or_vec(rd)
234}
235
236fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
237    (op_31_22 << 22)
238        | (0b1 << 24)
239        | (uimm12.bits() << 10)
240        | (machreg_to_gpr(rn) << 5)
241        | machreg_to_gpr_or_vec(rd)
242}
243
244fn enc_ldst_reg(
245    op_31_22: u32,
246    rn: Reg,
247    rm: Reg,
248    s_bit: bool,
249    extendop: Option<ExtendOp>,
250    rd: Reg,
251) -> u32 {
252    let s_bit = if s_bit { 1 } else { 0 };
253    let extend_bits = match extendop {
254        Some(ExtendOp::UXTW) => 0b010,
255        Some(ExtendOp::SXTW) => 0b110,
256        Some(ExtendOp::SXTX) => 0b111,
257        None => 0b011, // LSL
258        _ => panic!("bad extend mode for ld/st AMode"),
259    };
260    (op_31_22 << 22)
261        | (1 << 21)
262        | (machreg_to_gpr(rm) << 16)
263        | (extend_bits << 13)
264        | (s_bit << 12)
265        | (0b10 << 10)
266        | (machreg_to_gpr(rn) << 5)
267        | machreg_to_gpr_or_vec(rd)
268}
269
270pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
271    (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
272}
273
274fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
275    debug_assert_eq!(q & 0b1, q);
276    debug_assert_eq!(size & 0b11, size);
277    0b0_0_0011010_10_00000_110_0_00_00000_00000
278        | q << 30
279        | size << 10
280        | machreg_to_gpr(rn) << 5
281        | machreg_to_vec(rt.to_reg())
282}
283
284fn enc_ldst_vec_pair(
285    opc: u32,
286    amode: u32,
287    is_load: bool,
288    simm7: SImm7Scaled,
289    rn: Reg,
290    rt: Reg,
291    rt2: Reg,
292) -> u32 {
293    debug_assert_eq!(opc & 0b11, opc);
294    debug_assert_eq!(amode & 0b11, amode);
295
296    0b00_10110_00_0_0000000_00000_00000_00000
297        | opc << 30
298        | amode << 23
299        | (is_load as u32) << 22
300        | simm7.bits() << 15
301        | machreg_to_vec(rt2) << 10
302        | machreg_to_gpr(rn) << 5
303        | machreg_to_vec(rt)
304}
305
306fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
307    (top11 << 21)
308        | (machreg_to_vec(rm) << 16)
309        | (bit15_10 << 10)
310        | (machreg_to_vec(rn) << 5)
311        | machreg_to_vec(rd.to_reg())
312}
313
314fn enc_vec_rrr_long(
315    q: u32,
316    u: u32,
317    size: u32,
318    bit14: u32,
319    rm: Reg,
320    rn: Reg,
321    rd: Writable<Reg>,
322) -> u32 {
323    debug_assert_eq!(q & 0b1, q);
324    debug_assert_eq!(u & 0b1, u);
325    debug_assert_eq!(size & 0b11, size);
326    debug_assert_eq!(bit14 & 0b1, bit14);
327
328    0b0_0_0_01110_00_1_00000_100000_00000_00000
329        | q << 30
330        | u << 29
331        | size << 22
332        | bit14 << 14
333        | (machreg_to_vec(rm) << 16)
334        | (machreg_to_vec(rn) << 5)
335        | machreg_to_vec(rd.to_reg())
336}
337
338fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
339    (0b01011010110 << 21)
340        | size << 31
341        | opcode2 << 16
342        | opcode1 << 10
343        | machreg_to_gpr(rn) << 5
344        | machreg_to_gpr(rd.to_reg())
345}
346
347pub(crate) fn enc_br(rn: Reg) -> u32 {
348    0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
349}
350
351pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {
352    let off = u32::try_from(off).unwrap();
353    let immlo = off & 3;
354    let immhi = (off >> 2) & ((1 << 19) - 1);
355    opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
356}
357
358pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
359    let opcode = 0b00010000 << 24;
360    enc_adr_inst(opcode, off, rd)
361}
362
363pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {
364    let opcode = 0b10010000 << 24;
365    enc_adr_inst(opcode, off, rd)
366}
367
368fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {
369    debug_assert_eq!(op & 0b1, op);
370    debug_assert_eq!(o2 & 0b1, o2);
371    0b100_11010100_00000_0000_00_00000_00000
372        | (op << 30)
373        | (machreg_to_gpr(rm) << 16)
374        | (cond.bits() << 12)
375        | (o2 << 10)
376        | (machreg_to_gpr(rn) << 5)
377        | machreg_to_gpr(rd.to_reg())
378}
379
380fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
381    0b000_11110_00_1_00000_0000_11_00000_00000
382        | (size.ftype() << 22)
383        | (machreg_to_vec(rm) << 16)
384        | (machreg_to_vec(rn) << 5)
385        | machreg_to_vec(rd.to_reg())
386        | (cond.bits() << 12)
387}
388
389fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {
390    0b0_1_1_11010010_00000_0000_00_00000_0_0000
391        | size.sf_bit() << 31
392        | machreg_to_gpr(rm) << 16
393        | cond.bits() << 12
394        | machreg_to_gpr(rn) << 5
395        | nzcv.bits()
396}
397
398fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
399    0b0_1_1_11010010_00000_0000_10_00000_0_0000
400        | size.sf_bit() << 31
401        | imm.bits() << 16
402        | cond.bits() << 12
403        | machreg_to_gpr(rn) << 5
404        | nzcv.bits()
405}
406
407fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
408    match size {
409        OperandSize::Size64 => {
410            debug_assert!(immr <= 63);
411            debug_assert!(imms <= 63);
412        }
413        OperandSize::Size32 => {
414            debug_assert!(immr <= 31);
415            debug_assert!(imms <= 31);
416        }
417    }
418    debug_assert_eq!(opc & 0b11, opc);
419    let n_bit = size.sf_bit();
420    0b0_00_100110_0_000000_000000_00000_00000
421        | size.sf_bit() << 31
422        | u32::from(opc) << 29
423        | n_bit << 22
424        | u32::from(immr) << 16
425        | u32::from(imms) << 10
426        | machreg_to_gpr(rn) << 5
427        | machreg_to_gpr(rd.to_reg())
428}
429
430fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
431    0b00001110_101_00000_00011_1_00000_00000
432        | ((is_16b as u32) << 30)
433        | machreg_to_vec(rd.to_reg())
434        | (machreg_to_vec(rn) << 16)
435        | (machreg_to_vec(rn) << 5)
436}
437
438fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
439    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
440}
441
442fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
443    (top22 << 10)
444        | (machreg_to_vec(rm) << 16)
445        | (machreg_to_vec(rn) << 5)
446        | machreg_to_vec(rd.to_reg())
447}
448
449fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
450    (top17 << 15)
451        | (machreg_to_vec(rm) << 16)
452        | (machreg_to_vec(ra) << 10)
453        | (machreg_to_vec(rn) << 5)
454        | machreg_to_vec(rd.to_reg())
455}
456
457fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
458    0b000_11110_00_1_00000_00_1000_00000_00000
459        | (size.ftype() << 22)
460        | (machreg_to_vec(rm) << 16)
461        | (machreg_to_vec(rn) << 5)
462}
463
464fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
465    (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
466}
467
468fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
469    (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
470}
471
472fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
473    (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
474}
475
476fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
477    debug_assert_eq!(qu & 0b11, qu);
478    debug_assert_eq!(size & 0b11, size);
479    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
480    let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
481    bits | qu << 29
482        | size << 22
483        | bits_12_16 << 12
484        | machreg_to_vec(rn) << 5
485        | machreg_to_vec(rd.to_reg())
486}
487
488fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
489    debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
490
491    0b010_11110_11_11000_11011_10_00000_00000
492        | bits_12_16 << 12
493        | machreg_to_vec(rn) << 5
494        | machreg_to_vec(rd.to_reg())
495}
496
497fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
498    debug_assert_eq!(u & 0b1, u);
499    debug_assert_eq!(enc_size & 0b1, enc_size);
500
501    0b0_1_0_01110_00_10000_00_0_10_10_00000_00000
502        | u << 29
503        | enc_size << 22
504        | machreg_to_vec(rn) << 5
505        | machreg_to_vec(rd.to_reg())
506}
507
508fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
509    debug_assert_eq!(q & 0b1, q);
510    debug_assert_eq!(u & 0b1, u);
511    debug_assert_eq!(size & 0b11, size);
512    debug_assert_eq!(opcode & 0b11111, opcode);
513    0b0_0_0_01110_00_11000_0_0000_10_00000_00000
514        | q << 30
515        | u << 29
516        | size << 22
517        | opcode << 12
518        | machreg_to_vec(rn) << 5
519        | machreg_to_vec(rd.to_reg())
520}
521
522fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
523    debug_assert_eq!(len & 0b11, len);
524    0b0_1_001110_000_00000_0_00_0_00_00000_00000
525        | (machreg_to_vec(rm) << 16)
526        | len << 13
527        | (is_extension as u32) << 12
528        | (machreg_to_vec(rn) << 5)
529        | machreg_to_vec(rd.to_reg())
530}
531
532fn enc_dmb_ish() -> u32 {
533    0xD5033BBF
534}
535
536fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {
537    assert!(machreg_to_gpr(rt.to_reg()) != 31);
538    let sz = match ty {
539        I64 => 0b11,
540        I32 => 0b10,
541        I16 => 0b01,
542        I8 => 0b00,
543        _ => unreachable!(),
544    };
545    let bit15 = match op {
546        AtomicRMWOp::Swp => 0b1,
547        _ => 0b0,
548    };
549    let op = match op {
550        AtomicRMWOp::Add => 0b000,
551        AtomicRMWOp::Clr => 0b001,
552        AtomicRMWOp::Eor => 0b010,
553        AtomicRMWOp::Set => 0b011,
554        AtomicRMWOp::Smax => 0b100,
555        AtomicRMWOp::Smin => 0b101,
556        AtomicRMWOp::Umax => 0b110,
557        AtomicRMWOp::Umin => 0b111,
558        AtomicRMWOp::Swp => 0b000,
559    };
560    0b00_111_000_111_00000_0_000_00_00000_00000
561        | (sz << 30)
562        | (machreg_to_gpr(rs) << 16)
563        | bit15 << 15
564        | (op << 12)
565        | (machreg_to_gpr(rn) << 5)
566        | machreg_to_gpr(rt.to_reg())
567}
568
569fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
570    let sz = match ty {
571        I64 => 0b11,
572        I32 => 0b10,
573        I16 => 0b01,
574        I8 => 0b00,
575        _ => unreachable!(),
576    };
577    0b00_001000_1_1_0_11111_1_11111_00000_00000
578        | (sz << 30)
579        | (machreg_to_gpr(rn) << 5)
580        | machreg_to_gpr(rt.to_reg())
581}
582
583fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {
584    let sz = match ty {
585        I64 => 0b11,
586        I32 => 0b10,
587        I16 => 0b01,
588        I8 => 0b00,
589        _ => unreachable!(),
590    };
591    0b00_001000_100_11111_1_11111_00000_00000
592        | (sz << 30)
593        | (machreg_to_gpr(rn) << 5)
594        | machreg_to_gpr(rt)
595}
596
597fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
598    let sz = match ty {
599        I64 => 0b11,
600        I32 => 0b10,
601        I16 => 0b01,
602        I8 => 0b00,
603        _ => unreachable!(),
604    };
605    0b00_001000_0_1_0_11111_1_11111_00000_00000
606        | (sz << 30)
607        | (machreg_to_gpr(rn) << 5)
608        | machreg_to_gpr(rt.to_reg())
609}
610
611fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
612    let sz = match ty {
613        I64 => 0b11,
614        I32 => 0b10,
615        I16 => 0b01,
616        I8 => 0b00,
617        _ => unreachable!(),
618    };
619    0b00_001000_000_00000_1_11111_00000_00000
620        | (sz << 30)
621        | (machreg_to_gpr(rs.to_reg()) << 16)
622        | (machreg_to_gpr(rn) << 5)
623        | machreg_to_gpr(rt)
624}
625
626fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
627    debug_assert_eq!(size & 0b11, size);
628
629    0b00_0010001_1_1_00000_1_11111_00000_00000
630        | size << 30
631        | machreg_to_gpr(rs.to_reg()) << 16
632        | machreg_to_gpr(rn) << 5
633        | machreg_to_gpr(rt)
634}
635
636fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
637    let abc = (imm >> 5) as u32;
638    let defgh = (imm & 0b11111) as u32;
639
640    debug_assert_eq!(cmode & 0b1111, cmode);
641    debug_assert_eq!(q_op & 0b11, q_op);
642
643    0b0_0_0_0111100000_000_0000_01_00000_00000
644        | (q_op << 29)
645        | (abc << 16)
646        | (cmode << 12)
647        | (defgh << 5)
648        | machreg_to_vec(rd.to_reg())
649}
650
651/// State carried between emissions of a sequence of instructions.
652#[derive(Default, Clone, Debug)]
653pub struct EmitState {
654    /// Safepoint stack map for upcoming instruction, as provided to
655    /// `pre_safepoint()`.
656    stack_map: Option<StackMap>,
657
658    /// The user stack map for the upcoming instruction, as provided to
659    /// `pre_safepoint()`.
660    user_stack_map: Option<ir::UserStackMap>,
661
662    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
663    /// optimized away at compiletime. See [cranelift_control].
664    ctrl_plane: ControlPlane,
665
666    frame_layout: FrameLayout,
667}
668
669impl MachInstEmitState<Inst> for EmitState {
670    fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
671        EmitState {
672            stack_map: None,
673            user_stack_map: None,
674            ctrl_plane,
675            frame_layout: abi.frame_layout().clone(),
676        }
677    }
678
679    fn pre_safepoint(
680        &mut self,
681        stack_map: Option<StackMap>,
682        user_stack_map: Option<ir::UserStackMap>,
683    ) {
684        self.stack_map = stack_map;
685        self.user_stack_map = user_stack_map;
686    }
687
688    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
689        &mut self.ctrl_plane
690    }
691
692    fn take_ctrl_plane(self) -> ControlPlane {
693        self.ctrl_plane
694    }
695
696    fn frame_layout(&self) -> &FrameLayout {
697        &self.frame_layout
698    }
699}
700
701impl EmitState {
702    fn take_stack_map(&mut self) -> (Option<StackMap>, Option<ir::UserStackMap>) {
703        (self.stack_map.take(), self.user_stack_map.take())
704    }
705
706    fn clear_post_insn(&mut self) {
707        self.stack_map = None;
708    }
709}
710
711/// Constant state used during function compilation.
712pub struct EmitInfo(settings::Flags);
713
714impl EmitInfo {
715    /// Create a constant state for emission of instructions.
716    pub fn new(flags: settings::Flags) -> Self {
717        Self(flags)
718    }
719}
720
721impl MachInstEmit for Inst {
722    type State = EmitState;
723    type Info = EmitInfo;
724
725    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
726        // N.B.: we *must* not exceed the "worst-case size" used to compute
727        // where to insert islands, except when islands are explicitly triggered
728        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
729        // to allow disabling the check for `JTSequence`, which is always
730        // emitted following an `EmitIsland`.
731        let mut start_off = sink.cur_offset();
732
733        match self {
734            &Inst::AluRRR {
735                alu_op,
736                size,
737                rd,
738                rn,
739                rm,
740            } => {
741                debug_assert!(match alu_op {
742                    ALUOp::SDiv | ALUOp::UDiv | ALUOp::SMulH | ALUOp::UMulH =>
743                        size == OperandSize::Size64,
744                    _ => true,
745                });
746                let top11 = match alu_op {
747                    ALUOp::Add => 0b00001011_000,
748                    ALUOp::Adc => 0b00011010_000,
749                    ALUOp::AdcS => 0b00111010_000,
750                    ALUOp::Sub => 0b01001011_000,
751                    ALUOp::Sbc => 0b01011010_000,
752                    ALUOp::SbcS => 0b01111010_000,
753                    ALUOp::Orr => 0b00101010_000,
754                    ALUOp::And => 0b00001010_000,
755                    ALUOp::AndS => 0b01101010_000,
756                    ALUOp::Eor => 0b01001010_000,
757                    ALUOp::OrrNot => 0b00101010_001,
758                    ALUOp::AndNot => 0b00001010_001,
759                    ALUOp::EorNot => 0b01001010_001,
760                    ALUOp::AddS => 0b00101011_000,
761                    ALUOp::SubS => 0b01101011_000,
762                    ALUOp::SDiv => 0b10011010_110,
763                    ALUOp::UDiv => 0b10011010_110,
764                    ALUOp::RotR | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,
765                    ALUOp::SMulH => 0b10011011_010,
766                    ALUOp::UMulH => 0b10011011_110,
767                };
768                let top11 = top11 | size.sf_bit() << 10;
769                let bit15_10 = match alu_op {
770                    ALUOp::SDiv => 0b000011,
771                    ALUOp::UDiv => 0b000010,
772                    ALUOp::RotR => 0b001011,
773                    ALUOp::Lsr => 0b001001,
774                    ALUOp::Asr => 0b001010,
775                    ALUOp::Lsl => 0b001000,
776                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
777                    _ => 0b000000,
778                };
779                debug_assert_ne!(writable_stack_reg(), rd);
780                // The stack pointer is the zero register in this context, so this might be an
781                // indication that something is wrong.
782                debug_assert_ne!(stack_reg(), rn);
783                debug_assert_ne!(stack_reg(), rm);
784                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
785            }
786            &Inst::AluRRRR {
787                alu_op,
788                size,
789                rd,
790                rm,
791                rn,
792                ra,
793            } => {
794                let (top11, bit15) = match alu_op {
795                    ALUOp3::MAdd => (0b0_00_11011_000, 0),
796                    ALUOp3::MSub => (0b0_00_11011_000, 1),
797                    ALUOp3::UMAddL => {
798                        debug_assert!(size == OperandSize::Size32);
799                        (0b1_00_11011_1_01, 0)
800                    }
801                    ALUOp3::SMAddL => {
802                        debug_assert!(size == OperandSize::Size32);
803                        (0b1_00_11011_0_01, 0)
804                    }
805                };
806                let top11 = top11 | size.sf_bit() << 10;
807                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
808            }
809            &Inst::AluRRImm12 {
810                alu_op,
811                size,
812                rd,
813                rn,
814                ref imm12,
815            } => {
816                let top8 = match alu_op {
817                    ALUOp::Add => 0b000_10001,
818                    ALUOp::Sub => 0b010_10001,
819                    ALUOp::AddS => 0b001_10001,
820                    ALUOp::SubS => 0b011_10001,
821                    _ => unimplemented!("{:?}", alu_op),
822                };
823                let top8 = top8 | size.sf_bit() << 7;
824                sink.put4(enc_arith_rr_imm12(
825                    top8,
826                    imm12.shift_bits(),
827                    imm12.imm_bits(),
828                    rn,
829                    rd,
830                ));
831            }
832            &Inst::AluRRImmLogic {
833                alu_op,
834                size,
835                rd,
836                rn,
837                ref imml,
838            } => {
839                let (top9, inv) = match alu_op {
840                    ALUOp::Orr => (0b001_100100, false),
841                    ALUOp::And => (0b000_100100, false),
842                    ALUOp::AndS => (0b011_100100, false),
843                    ALUOp::Eor => (0b010_100100, false),
844                    ALUOp::OrrNot => (0b001_100100, true),
845                    ALUOp::AndNot => (0b000_100100, true),
846                    ALUOp::EorNot => (0b010_100100, true),
847                    _ => unimplemented!("{:?}", alu_op),
848                };
849                let top9 = top9 | size.sf_bit() << 8;
850                let imml = if inv { imml.invert() } else { *imml };
851                sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
852            }
853
854            &Inst::AluRRImmShift {
855                alu_op,
856                size,
857                rd,
858                rn,
859                ref immshift,
860            } => {
861                let amt = immshift.value();
862                let (top10, immr, imms) = match alu_op {
863                    ALUOp::RotR => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
864                    ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),
865                    ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),
866                    ALUOp::Lsl => {
867                        let bits = if size.is64() { 64 } else { 32 };
868                        (
869                            0b0101001100,
870                            u32::from((bits - amt) % bits),
871                            u32::from(bits - 1 - amt),
872                        )
873                    }
874                    _ => unimplemented!("{:?}", alu_op),
875                };
876                let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();
877                let imms = match alu_op {
878                    ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,
879                    _ => imms,
880                };
881                sink.put4(
882                    (top10 << 22)
883                        | (immr << 16)
884                        | (imms << 10)
885                        | (machreg_to_gpr(rn) << 5)
886                        | machreg_to_gpr(rd.to_reg()),
887                );
888            }
889
890            &Inst::AluRRRShift {
891                alu_op,
892                size,
893                rd,
894                rn,
895                rm,
896                ref shiftop,
897            } => {
898                let top11: u32 = match alu_op {
899                    ALUOp::Add => 0b000_01011000,
900                    ALUOp::AddS => 0b001_01011000,
901                    ALUOp::Sub => 0b010_01011000,
902                    ALUOp::SubS => 0b011_01011000,
903                    ALUOp::Orr => 0b001_01010000,
904                    ALUOp::And => 0b000_01010000,
905                    ALUOp::AndS => 0b011_01010000,
906                    ALUOp::Eor => 0b010_01010000,
907                    ALUOp::OrrNot => 0b001_01010001,
908                    ALUOp::EorNot => 0b010_01010001,
909                    ALUOp::AndNot => 0b000_01010001,
910                    _ => unimplemented!("{:?}", alu_op),
911                };
912                let top11 = top11 | size.sf_bit() << 10;
913                let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
914                let bits_15_10 = u32::from(shiftop.amt().value());
915                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
916            }
917
918            &Inst::AluRRRExtend {
919                alu_op,
920                size,
921                rd,
922                rn,
923                rm,
924                extendop,
925            } => {
926                let top11: u32 = match alu_op {
927                    ALUOp::Add => 0b00001011001,
928                    ALUOp::Sub => 0b01001011001,
929                    ALUOp::AddS => 0b00101011001,
930                    ALUOp::SubS => 0b01101011001,
931                    _ => unimplemented!("{:?}", alu_op),
932                };
933                let top11 = top11 | size.sf_bit() << 10;
934                let bits_15_10 = u32::from(extendop.bits()) << 3;
935                sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
936            }
937
938            &Inst::BitRR {
939                op, size, rd, rn, ..
940            } => {
941                let (op1, op2) = match op {
942                    BitOp::RBit => (0b00000, 0b000000),
943                    BitOp::Clz => (0b00000, 0b000100),
944                    BitOp::Cls => (0b00000, 0b000101),
945                    BitOp::Rev16 => (0b00000, 0b000001),
946                    BitOp::Rev32 => (0b00000, 0b000010),
947                    BitOp::Rev64 => (0b00000, 0b000011),
948                };
949                sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
950            }
951
952            &Inst::ULoad8 { rd, ref mem, flags }
953            | &Inst::SLoad8 { rd, ref mem, flags }
954            | &Inst::ULoad16 { rd, ref mem, flags }
955            | &Inst::SLoad16 { rd, ref mem, flags }
956            | &Inst::ULoad32 { rd, ref mem, flags }
957            | &Inst::SLoad32 { rd, ref mem, flags }
958            | &Inst::ULoad64 {
959                rd, ref mem, flags, ..
960            }
961            | &Inst::FpuLoad32 { rd, ref mem, flags }
962            | &Inst::FpuLoad64 { rd, ref mem, flags }
963            | &Inst::FpuLoad128 { rd, ref mem, flags } => {
964                let mem = mem.clone();
965                let access_ty = self.mem_type().unwrap();
966                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
967
968                for inst in mem_insts.into_iter() {
969                    inst.emit(sink, emit_info, state);
970                }
971
972                // ldst encoding helpers take Reg, not Writable<Reg>.
973                let rd = rd.to_reg();
974
975                // This is the base opcode (top 10 bits) for the "unscaled
976                // immediate" form (Unscaled). Other addressing modes will OR in
977                // other values for bits 24/25 (bits 1/2 of this constant).
978                let op = match self {
979                    Inst::ULoad8 { .. } => 0b0011100001,
980                    Inst::SLoad8 { .. } => 0b0011100010,
981                    Inst::ULoad16 { .. } => 0b0111100001,
982                    Inst::SLoad16 { .. } => 0b0111100010,
983                    Inst::ULoad32 { .. } => 0b1011100001,
984                    Inst::SLoad32 { .. } => 0b1011100010,
985                    Inst::ULoad64 { .. } => 0b1111100001,
986                    Inst::FpuLoad32 { .. } => 0b1011110001,
987                    Inst::FpuLoad64 { .. } => 0b1111110001,
988                    Inst::FpuLoad128 { .. } => 0b0011110011,
989                    _ => unreachable!(),
990                };
991
992                if let Some(trap_code) = flags.trap_code() {
993                    // Register the offset at which the actual load instruction starts.
994                    sink.add_trap(trap_code);
995                }
996
997                match &mem {
998                    &AMode::Unscaled { rn, simm9 } => {
999                        let reg = rn;
1000                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1001                    }
1002                    &AMode::UnsignedOffset { rn, uimm12 } => {
1003                        let reg = rn;
1004                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1005                    }
1006                    &AMode::RegReg { rn, rm } => {
1007                        let r1 = rn;
1008                        let r2 = rm;
1009                        sink.put4(enc_ldst_reg(
1010                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1011                        ));
1012                    }
1013                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1014                        let r1 = rn;
1015                        let r2 = rm;
1016                        let extendop = match &mem {
1017                            &AMode::RegScaled { .. } => None,
1018                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1019                            _ => unreachable!(),
1020                        };
1021                        sink.put4(enc_ldst_reg(
1022                            op, r1, r2, /* scaled = */ true, extendop, rd,
1023                        ));
1024                    }
1025                    &AMode::RegExtended { rn, rm, extendop } => {
1026                        let r1 = rn;
1027                        let r2 = rm;
1028                        sink.put4(enc_ldst_reg(
1029                            op,
1030                            r1,
1031                            r2,
1032                            /* scaled = */ false,
1033                            Some(extendop),
1034                            rd,
1035                        ));
1036                    }
1037                    &AMode::Label { ref label } => {
1038                        let offset = match label {
1039                            // cast i32 to u32 (two's-complement)
1040                            MemLabel::PCRel(off) => *off as u32,
1041                            // Emit a relocation into the `MachBuffer`
1042                            // for the label that's being loaded from and
1043                            // encode an address of 0 in its place which will
1044                            // get filled in by relocation resolution later on.
1045                            MemLabel::Mach(label) => {
1046                                sink.use_label_at_offset(
1047                                    sink.cur_offset(),
1048                                    *label,
1049                                    LabelUse::Ldr19,
1050                                );
1051                                0
1052                            }
1053                        } / 4;
1054                        assert!(offset < (1 << 19));
1055                        match self {
1056                            &Inst::ULoad32 { .. } => {
1057                                sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
1058                            }
1059                            &Inst::SLoad32 { .. } => {
1060                                sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
1061                            }
1062                            &Inst::FpuLoad32 { .. } => {
1063                                sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
1064                            }
1065                            &Inst::ULoad64 { .. } => {
1066                                sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
1067                            }
1068                            &Inst::FpuLoad64 { .. } => {
1069                                sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
1070                            }
1071                            &Inst::FpuLoad128 { .. } => {
1072                                sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
1073                            }
1074                            _ => panic!("Unsupported size for LDR from constant pool!"),
1075                        }
1076                    }
1077                    &AMode::SPPreIndexed { simm9 } => {
1078                        let reg = stack_reg();
1079                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1080                    }
1081                    &AMode::SPPostIndexed { simm9 } => {
1082                        let reg = stack_reg();
1083                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1084                    }
1085                    // Eliminated by `mem_finalize()` above.
1086                    &AMode::SPOffset { .. }
1087                    | &AMode::FPOffset { .. }
1088                    | &AMode::IncomingArg { .. }
1089                    | &AMode::SlotOffset { .. }
1090                    | &AMode::Const { .. }
1091                    | &AMode::RegOffset { .. } => {
1092                        panic!("Should not see {:?} here!", mem)
1093                    }
1094                }
1095            }
1096
1097            &Inst::Store8 { rd, ref mem, flags }
1098            | &Inst::Store16 { rd, ref mem, flags }
1099            | &Inst::Store32 { rd, ref mem, flags }
1100            | &Inst::Store64 { rd, ref mem, flags }
1101            | &Inst::FpuStore32 { rd, ref mem, flags }
1102            | &Inst::FpuStore64 { rd, ref mem, flags }
1103            | &Inst::FpuStore128 { rd, ref mem, flags } => {
1104                let mem = mem.clone();
1105                let access_ty = self.mem_type().unwrap();
1106                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);
1107
1108                for inst in mem_insts.into_iter() {
1109                    inst.emit(sink, emit_info, state);
1110                }
1111
1112                let op = match self {
1113                    Inst::Store8 { .. } => 0b0011100000,
1114                    Inst::Store16 { .. } => 0b0111100000,
1115                    Inst::Store32 { .. } => 0b1011100000,
1116                    Inst::Store64 { .. } => 0b1111100000,
1117                    Inst::FpuStore32 { .. } => 0b1011110000,
1118                    Inst::FpuStore64 { .. } => 0b1111110000,
1119                    Inst::FpuStore128 { .. } => 0b0011110010,
1120                    _ => unreachable!(),
1121                };
1122
1123                if let Some(trap_code) = flags.trap_code() {
1124                    // Register the offset at which the actual store instruction starts.
1125                    sink.add_trap(trap_code);
1126                }
1127
1128                match &mem {
1129                    &AMode::Unscaled { rn, simm9 } => {
1130                        let reg = rn;
1131                        sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
1132                    }
1133                    &AMode::UnsignedOffset { rn, uimm12 } => {
1134                        let reg = rn;
1135                        sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));
1136                    }
1137                    &AMode::RegReg { rn, rm } => {
1138                        let r1 = rn;
1139                        let r2 = rm;
1140                        sink.put4(enc_ldst_reg(
1141                            op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
1142                        ));
1143                    }
1144                    &AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {
1145                        let r1 = rn;
1146                        let r2 = rm;
1147                        let extendop = match &mem {
1148                            &AMode::RegScaled { .. } => None,
1149                            &AMode::RegScaledExtended { extendop, .. } => Some(extendop),
1150                            _ => unreachable!(),
1151                        };
1152                        sink.put4(enc_ldst_reg(
1153                            op, r1, r2, /* scaled = */ true, extendop, rd,
1154                        ));
1155                    }
1156                    &AMode::RegExtended { rn, rm, extendop } => {
1157                        let r1 = rn;
1158                        let r2 = rm;
1159                        sink.put4(enc_ldst_reg(
1160                            op,
1161                            r1,
1162                            r2,
1163                            /* scaled = */ false,
1164                            Some(extendop),
1165                            rd,
1166                        ));
1167                    }
1168                    &AMode::Label { .. } => {
1169                        panic!("Store to a MemLabel not implemented!");
1170                    }
1171                    &AMode::SPPreIndexed { simm9 } => {
1172                        let reg = stack_reg();
1173                        sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));
1174                    }
1175                    &AMode::SPPostIndexed { simm9 } => {
1176                        let reg = stack_reg();
1177                        sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));
1178                    }
1179                    // Eliminated by `mem_finalize()` above.
1180                    &AMode::SPOffset { .. }
1181                    | &AMode::FPOffset { .. }
1182                    | &AMode::IncomingArg { .. }
1183                    | &AMode::SlotOffset { .. }
1184                    | &AMode::Const { .. }
1185                    | &AMode::RegOffset { .. } => {
1186                        panic!("Should not see {:?} here!", mem)
1187                    }
1188                }
1189            }
1190
1191            &Inst::StoreP64 {
1192                rt,
1193                rt2,
1194                ref mem,
1195                flags,
1196            } => {
1197                let mem = mem.clone();
1198                if let Some(trap_code) = flags.trap_code() {
1199                    // Register the offset at which the actual store instruction starts.
1200                    sink.add_trap(trap_code);
1201                }
1202                match &mem {
1203                    &PairAMode::SignedOffset { reg, simm7 } => {
1204                        assert_eq!(simm7.scale_ty, I64);
1205                        sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
1206                    }
1207                    &PairAMode::SPPreIndexed { simm7 } => {
1208                        assert_eq!(simm7.scale_ty, I64);
1209                        let reg = stack_reg();
1210                        sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));
1211                    }
1212                    &PairAMode::SPPostIndexed { simm7 } => {
1213                        assert_eq!(simm7.scale_ty, I64);
1214                        let reg = stack_reg();
1215                        sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));
1216                    }
1217                }
1218            }
1219            &Inst::LoadP64 {
1220                rt,
1221                rt2,
1222                ref mem,
1223                flags,
1224            } => {
1225                let rt = rt.to_reg();
1226                let rt2 = rt2.to_reg();
1227                let mem = mem.clone();
1228                if let Some(trap_code) = flags.trap_code() {
1229                    // Register the offset at which the actual load instruction starts.
1230                    sink.add_trap(trap_code);
1231                }
1232
1233                match &mem {
1234                    &PairAMode::SignedOffset { reg, simm7 } => {
1235                        assert_eq!(simm7.scale_ty, I64);
1236                        sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
1237                    }
1238                    &PairAMode::SPPreIndexed { simm7 } => {
1239                        assert_eq!(simm7.scale_ty, I64);
1240                        let reg = stack_reg();
1241                        sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));
1242                    }
1243                    &PairAMode::SPPostIndexed { simm7 } => {
1244                        assert_eq!(simm7.scale_ty, I64);
1245                        let reg = stack_reg();
1246                        sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));
1247                    }
1248                }
1249            }
1250            &Inst::FpuLoadP64 {
1251                rt,
1252                rt2,
1253                ref mem,
1254                flags,
1255            }
1256            | &Inst::FpuLoadP128 {
1257                rt,
1258                rt2,
1259                ref mem,
1260                flags,
1261            } => {
1262                let rt = rt.to_reg();
1263                let rt2 = rt2.to_reg();
1264                let mem = mem.clone();
1265
1266                if let Some(trap_code) = flags.trap_code() {
1267                    // Register the offset at which the actual load instruction starts.
1268                    sink.add_trap(trap_code);
1269                }
1270
1271                let opc = match self {
1272                    &Inst::FpuLoadP64 { .. } => 0b01,
1273                    &Inst::FpuLoadP128 { .. } => 0b10,
1274                    _ => unreachable!(),
1275                };
1276
1277                match &mem {
1278                    &PairAMode::SignedOffset { reg, simm7 } => {
1279                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1280                        sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
1281                    }
1282                    &PairAMode::SPPreIndexed { simm7 } => {
1283                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1284                        let reg = stack_reg();
1285                        sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));
1286                    }
1287                    &PairAMode::SPPostIndexed { simm7 } => {
1288                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1289                        let reg = stack_reg();
1290                        sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));
1291                    }
1292                }
1293            }
1294            &Inst::FpuStoreP64 {
1295                rt,
1296                rt2,
1297                ref mem,
1298                flags,
1299            }
1300            | &Inst::FpuStoreP128 {
1301                rt,
1302                rt2,
1303                ref mem,
1304                flags,
1305            } => {
1306                let mem = mem.clone();
1307
1308                if let Some(trap_code) = flags.trap_code() {
1309                    // Register the offset at which the actual store instruction starts.
1310                    sink.add_trap(trap_code);
1311                }
1312
1313                let opc = match self {
1314                    &Inst::FpuStoreP64 { .. } => 0b01,
1315                    &Inst::FpuStoreP128 { .. } => 0b10,
1316                    _ => unreachable!(),
1317                };
1318
1319                match &mem {
1320                    &PairAMode::SignedOffset { reg, simm7 } => {
1321                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1322                        sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
1323                    }
1324                    &PairAMode::SPPreIndexed { simm7 } => {
1325                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1326                        let reg = stack_reg();
1327                        sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));
1328                    }
1329                    &PairAMode::SPPostIndexed { simm7 } => {
1330                        assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
1331                        let reg = stack_reg();
1332                        sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));
1333                    }
1334                }
1335            }
1336            &Inst::Mov { size, rd, rm } => {
1337                assert!(rd.to_reg().class() == rm.class());
1338                assert!(rm.class() == RegClass::Int);
1339
1340                match size {
1341                    OperandSize::Size64 => {
1342                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1343                        // should never MOV to XZR.
1344                        assert!(rd.to_reg() != stack_reg());
1345
1346                        if rm == stack_reg() {
1347                            // We can't use ORR here, so use an `add rd, sp, #0` instead.
1348                            let imm12 = Imm12::maybe_from_u64(0).unwrap();
1349                            sink.put4(enc_arith_rr_imm12(
1350                                0b100_10001,
1351                                imm12.shift_bits(),
1352                                imm12.imm_bits(),
1353                                rm,
1354                                rd,
1355                            ));
1356                        } else {
1357                            // Encoded as ORR rd, rm, zero.
1358                            sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
1359                        }
1360                    }
1361                    OperandSize::Size32 => {
1362                        // MOV to SP is interpreted as MOV to XZR instead. And our codegen
1363                        // should never MOV to XZR.
1364                        assert!(machreg_to_gpr(rd.to_reg()) != 31);
1365                        // Encoded as ORR rd, rm, zero.
1366                        sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
1367                    }
1368                }
1369            }
1370            &Inst::MovFromPReg { rd, rm } => {
1371                let rm: Reg = rm.into();
1372                debug_assert!([
1373                    regs::fp_reg(),
1374                    regs::stack_reg(),
1375                    regs::link_reg(),
1376                    regs::pinned_reg()
1377                ]
1378                .contains(&rm));
1379                assert!(rm.class() == RegClass::Int);
1380                assert!(rd.to_reg().class() == rm.class());
1381                let size = OperandSize::Size64;
1382                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1383            }
1384            &Inst::MovToPReg { rd, rm } => {
1385                let rd: Writable<Reg> = Writable::from_reg(rd.into());
1386                debug_assert!([
1387                    regs::fp_reg(),
1388                    regs::stack_reg(),
1389                    regs::link_reg(),
1390                    regs::pinned_reg()
1391                ]
1392                .contains(&rd.to_reg()));
1393                assert!(rd.to_reg().class() == RegClass::Int);
1394                assert!(rm.class() == rd.to_reg().class());
1395                let size = OperandSize::Size64;
1396                Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);
1397            }
1398            &Inst::MovWide { op, rd, imm, size } => {
1399                sink.put4(enc_move_wide(op, rd, imm, size));
1400            }
1401            &Inst::MovK { rd, rn, imm, size } => {
1402                debug_assert_eq!(rn, rd.to_reg());
1403                sink.put4(enc_movk(rd, imm, size));
1404            }
1405            &Inst::CSel { rd, rn, rm, cond } => {
1406                sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));
1407            }
1408            &Inst::CSNeg { rd, rn, rm, cond } => {
1409                sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));
1410            }
1411            &Inst::CSet { rd, cond } => {
1412                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));
1413            }
1414            &Inst::CSetm { rd, cond } => {
1415                sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));
1416            }
1417            &Inst::CCmp {
1418                size,
1419                rn,
1420                rm,
1421                nzcv,
1422                cond,
1423            } => {
1424                sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));
1425            }
1426            &Inst::CCmpImm {
1427                size,
1428                rn,
1429                imm,
1430                nzcv,
1431                cond,
1432            } => {
1433                sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
1434            }
1435            &Inst::AtomicRMW {
1436                ty,
1437                op,
1438                rs,
1439                rt,
1440                rn,
1441                flags,
1442            } => {
1443                if let Some(trap_code) = flags.trap_code() {
1444                    sink.add_trap(trap_code);
1445                }
1446
1447                sink.put4(enc_acq_rel(ty, op, rs, rt, rn));
1448            }
1449            &Inst::AtomicRMWLoop { ty, op, flags, .. } => {
1450                /* Emit this:
1451                     again:
1452                      ldaxr{,b,h}  x/w27, [x25]
1453                      // maybe sign extend
1454                      op          x28, x27, x26 // op is add,sub,and,orr,eor
1455                      stlxr{,b,h}  w24, x/w28, [x25]
1456                      cbnz        x24, again
1457
1458                   Operand conventions:
1459                      IN:  x25 (addr), x26 (2nd arg for op)
1460                      OUT: x27 (old value), x24 (trashed), x28 (trashed)
1461
1462                   It is unfortunate that, per the ARM documentation, x28 cannot be used for
1463                   both the store-data and success-flag operands of stlxr.  This causes the
1464                   instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
1465                   instead for the success-flag.
1466                */
1467                // TODO: We should not hardcode registers here, a better idea would be to
1468                // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those
1469                let xzr = zero_reg();
1470                let x24 = xreg(24);
1471                let x25 = xreg(25);
1472                let x26 = xreg(26);
1473                let x27 = xreg(27);
1474                let x28 = xreg(28);
1475                let x24wr = writable_xreg(24);
1476                let x27wr = writable_xreg(27);
1477                let x28wr = writable_xreg(28);
1478                let again_label = sink.get_label();
1479
1480                // again:
1481                sink.bind_label(again_label, &mut state.ctrl_plane);
1482
1483                if let Some(trap_code) = flags.trap_code() {
1484                    sink.add_trap(trap_code);
1485                }
1486
1487                sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]
1488                let size = OperandSize::from_ty(ty);
1489                let sign_ext = match op {
1490                    AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {
1491                        I16 => Some((ExtendOp::SXTH, 16)),
1492                        I8 => Some((ExtendOp::SXTB, 8)),
1493                        _ => None,
1494                    },
1495                    _ => None,
1496                };
1497
1498                // sxt{b|h} the loaded result if necessary.
1499                if sign_ext.is_some() {
1500                    let (_, from_bits) = sign_ext.unwrap();
1501                    Inst::Extend {
1502                        rd: x27wr,
1503                        rn: x27,
1504                        signed: true,
1505                        from_bits,
1506                        to_bits: size.bits(),
1507                    }
1508                    .emit(sink, emit_info, state);
1509                }
1510
1511                match op {
1512                    AtomicRMWLoopOp::Xchg => {} // do nothing
1513                    AtomicRMWLoopOp::Nand => {
1514                        // and x28, x27, x26
1515                        // mvn x28, x28
1516
1517                        Inst::AluRRR {
1518                            alu_op: ALUOp::And,
1519                            size,
1520                            rd: x28wr,
1521                            rn: x27,
1522                            rm: x26,
1523                        }
1524                        .emit(sink, emit_info, state);
1525
1526                        Inst::AluRRR {
1527                            alu_op: ALUOp::OrrNot,
1528                            size,
1529                            rd: x28wr,
1530                            rn: xzr,
1531                            rm: x28,
1532                        }
1533                        .emit(sink, emit_info, state);
1534                    }
1535                    AtomicRMWLoopOp::Umin
1536                    | AtomicRMWLoopOp::Umax
1537                    | AtomicRMWLoopOp::Smin
1538                    | AtomicRMWLoopOp::Smax => {
1539                        // cmp x27, x26 {?sxt}
1540                        // csel.op x28, x27, x26
1541
1542                        let cond = match op {
1543                            AtomicRMWLoopOp::Umin => Cond::Lo,
1544                            AtomicRMWLoopOp::Umax => Cond::Hi,
1545                            AtomicRMWLoopOp::Smin => Cond::Lt,
1546                            AtomicRMWLoopOp::Smax => Cond::Gt,
1547                            _ => unreachable!(),
1548                        };
1549
1550                        if sign_ext.is_some() {
1551                            let (extendop, _) = sign_ext.unwrap();
1552                            Inst::AluRRRExtend {
1553                                alu_op: ALUOp::SubS,
1554                                size,
1555                                rd: writable_zero_reg(),
1556                                rn: x27,
1557                                rm: x26,
1558                                extendop,
1559                            }
1560                            .emit(sink, emit_info, state);
1561                        } else {
1562                            Inst::AluRRR {
1563                                alu_op: ALUOp::SubS,
1564                                size,
1565                                rd: writable_zero_reg(),
1566                                rn: x27,
1567                                rm: x26,
1568                            }
1569                            .emit(sink, emit_info, state);
1570                        }
1571
1572                        Inst::CSel {
1573                            cond,
1574                            rd: x28wr,
1575                            rn: x27,
1576                            rm: x26,
1577                        }
1578                        .emit(sink, emit_info, state);
1579                    }
1580                    _ => {
1581                        // add/sub/and/orr/eor x28, x27, x26
1582                        let alu_op = match op {
1583                            AtomicRMWLoopOp::Add => ALUOp::Add,
1584                            AtomicRMWLoopOp::Sub => ALUOp::Sub,
1585                            AtomicRMWLoopOp::And => ALUOp::And,
1586                            AtomicRMWLoopOp::Orr => ALUOp::Orr,
1587                            AtomicRMWLoopOp::Eor => ALUOp::Eor,
1588                            AtomicRMWLoopOp::Nand
1589                            | AtomicRMWLoopOp::Umin
1590                            | AtomicRMWLoopOp::Umax
1591                            | AtomicRMWLoopOp::Smin
1592                            | AtomicRMWLoopOp::Smax
1593                            | AtomicRMWLoopOp::Xchg => unreachable!(),
1594                        };
1595
1596                        Inst::AluRRR {
1597                            alu_op,
1598                            size,
1599                            rd: x28wr,
1600                            rn: x27,
1601                            rm: x26,
1602                        }
1603                        .emit(sink, emit_info, state);
1604                    }
1605                }
1606
1607                if let Some(trap_code) = flags.trap_code() {
1608                    sink.add_trap(trap_code);
1609                }
1610                if op == AtomicRMWLoopOp::Xchg {
1611                    sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]
1612                } else {
1613                    sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1614                }
1615
1616                // cbnz w24, again
1617                // Note, we're actually testing x24, and relying on the default zero-high-half
1618                // rule in the assignment that `stlxr` does.
1619                let br_offset = sink.cur_offset();
1620                sink.put4(enc_conditional_br(
1621                    BranchTarget::Label(again_label),
1622                    CondBrKind::NotZero(x24),
1623                ));
1624                sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
1625            }
1626            &Inst::AtomicCAS {
1627                rd,
1628                rs,
1629                rt,
1630                rn,
1631                ty,
1632                flags,
1633            } => {
1634                debug_assert_eq!(rd.to_reg(), rs);
1635                let size = match ty {
1636                    I8 => 0b00,
1637                    I16 => 0b01,
1638                    I32 => 0b10,
1639                    I64 => 0b11,
1640                    _ => panic!("Unsupported type: {}", ty),
1641                };
1642
1643                if let Some(trap_code) = flags.trap_code() {
1644                    sink.add_trap(trap_code);
1645                }
1646
1647                sink.put4(enc_cas(size, rd, rt, rn));
1648            }
1649            &Inst::AtomicCASLoop { ty, flags, .. } => {
1650                /* Emit this:
1651                    again:
1652                     ldaxr{,b,h} x/w27, [x25]
1653                     cmp         x27, x/w26 uxt{b,h}
1654                     b.ne        out
1655                     stlxr{,b,h} w24, x/w28, [x25]
1656                     cbnz        x24, again
1657                    out:
1658
1659                  Operand conventions:
1660                     IN:  x25 (addr), x26 (expected value), x28 (replacement value)
1661                     OUT: x27 (old value), x24 (trashed)
1662                */
1663                let x24 = xreg(24);
1664                let x25 = xreg(25);
1665                let x26 = xreg(26);
1666                let x27 = xreg(27);
1667                let x28 = xreg(28);
1668                let xzrwr = writable_zero_reg();
1669                let x24wr = writable_xreg(24);
1670                let x27wr = writable_xreg(27);
1671                let again_label = sink.get_label();
1672                let out_label = sink.get_label();
1673
1674                // again:
1675                sink.bind_label(again_label, &mut state.ctrl_plane);
1676
1677                if let Some(trap_code) = flags.trap_code() {
1678                    sink.add_trap(trap_code);
1679                }
1680
1681                // ldaxr x27, [x25]
1682                sink.put4(enc_ldaxr(ty, x27wr, x25));
1683
1684                // The top 32-bits are zero-extended by the ldaxr so we don't
1685                // have to use UXTW, just the x-form of the register.
1686                let (bit21, extend_op) = match ty {
1687                    I8 => (0b1, 0b000000),
1688                    I16 => (0b1, 0b001000),
1689                    _ => (0b0, 0b000000),
1690                };
1691                let bits_31_21 = 0b111_01011_000 | bit21;
1692                // cmp x27, x26 (== subs xzr, x27, x26)
1693                sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));
1694
1695                // b.ne out
1696                let br_out_offset = sink.cur_offset();
1697                sink.put4(enc_conditional_br(
1698                    BranchTarget::Label(out_label),
1699                    CondBrKind::Cond(Cond::Ne),
1700                ));
1701                sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
1702
1703                if let Some(trap_code) = flags.trap_code() {
1704                    sink.add_trap(trap_code);
1705                }
1706
1707                sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]
1708
1709                // cbnz w24, again.
1710                // Note, we're actually testing x24, and relying on the default zero-high-half
1711                // rule in the assignment that `stlxr` does.
1712                let br_again_offset = sink.cur_offset();
1713                sink.put4(enc_conditional_br(
1714                    BranchTarget::Label(again_label),
1715                    CondBrKind::NotZero(x24),
1716                ));
1717                sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
1718
1719                // out:
1720                sink.bind_label(out_label, &mut state.ctrl_plane);
1721            }
1722            &Inst::LoadAcquire {
1723                access_ty,
1724                rt,
1725                rn,
1726                flags,
1727            } => {
1728                if let Some(trap_code) = flags.trap_code() {
1729                    sink.add_trap(trap_code);
1730                }
1731
1732                sink.put4(enc_ldar(access_ty, rt, rn));
1733            }
1734            &Inst::StoreRelease {
1735                access_ty,
1736                rt,
1737                rn,
1738                flags,
1739            } => {
1740                if let Some(trap_code) = flags.trap_code() {
1741                    sink.add_trap(trap_code);
1742                }
1743
1744                sink.put4(enc_stlr(access_ty, rt, rn));
1745            }
1746            &Inst::Fence {} => {
1747                sink.put4(enc_dmb_ish()); // dmb ish
1748            }
1749            &Inst::Csdb {} => {
1750                sink.put4(0xd503229f);
1751            }
1752            &Inst::FpuMove32 { rd, rn } => {
1753                sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
1754            }
1755            &Inst::FpuMove64 { rd, rn } => {
1756                sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));
1757            }
1758            &Inst::FpuMove128 { rd, rn } => {
1759                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
1760            }
1761            &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
1762                let (imm5, shift, mask) = match size.lane_size() {
1763                    ScalarSize::Size32 => (0b00100, 3, 0b011),
1764                    ScalarSize::Size64 => (0b01000, 4, 0b001),
1765                    _ => unimplemented!(),
1766                };
1767                debug_assert_eq!(idx & mask, idx);
1768                let imm5 = imm5 | ((idx as u32) << shift);
1769                sink.put4(
1770                    0b010_11110000_00000_000001_00000_00000
1771                        | (imm5 << 16)
1772                        | (machreg_to_vec(rn) << 5)
1773                        | machreg_to_vec(rd.to_reg()),
1774                );
1775            }
1776            &Inst::FpuExtend { rd, rn, size } => {
1777                sink.put4(enc_fpurr(
1778                    0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
1779                    rd,
1780                    rn,
1781                ));
1782            }
1783            &Inst::FpuRR {
1784                fpu_op,
1785                size,
1786                rd,
1787                rn,
1788            } => {
1789                let top22 = match fpu_op {
1790                    FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
1791                    FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
1792                    FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
1793                    FPUOp1::Cvt32To64 => {
1794                        debug_assert_eq!(size, ScalarSize::Size32);
1795                        0b000_11110_00_1_000101_10000
1796                    }
1797                    FPUOp1::Cvt64To32 => {
1798                        debug_assert_eq!(size, ScalarSize::Size64);
1799                        0b000_11110_01_1_000100_10000
1800                    }
1801                };
1802                let top22 = top22 | size.ftype() << 12;
1803                sink.put4(enc_fpurr(top22, rd, rn));
1804            }
1805            &Inst::FpuRRR {
1806                fpu_op,
1807                size,
1808                rd,
1809                rn,
1810                rm,
1811            } => {
1812                let top22 = match fpu_op {
1813                    FPUOp2::Add => 0b000_11110_00_1_00000_001010,
1814                    FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
1815                    FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
1816                    FPUOp2::Div => 0b000_11110_00_1_00000_000110,
1817                    FPUOp2::Max => 0b000_11110_00_1_00000_010010,
1818                    FPUOp2::Min => 0b000_11110_00_1_00000_010110,
1819                };
1820                let top22 = top22 | size.ftype() << 12;
1821                sink.put4(enc_fpurrr(top22, rd, rn, rm));
1822            }
1823            &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
1824                FPUOpRI::UShr32(imm) => {
1825                    debug_assert_eq!(32, imm.lane_size_in_bits);
1826                    sink.put4(
1827                        0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
1828                            | imm.enc() << 16
1829                            | machreg_to_vec(rn) << 5
1830                            | machreg_to_vec(rd.to_reg()),
1831                    )
1832                }
1833                FPUOpRI::UShr64(imm) => {
1834                    debug_assert_eq!(64, imm.lane_size_in_bits);
1835                    sink.put4(
1836                        0b01_1_111110_0000000_00_0_0_0_1_00000_00000
1837                            | imm.enc() << 16
1838                            | machreg_to_vec(rn) << 5
1839                            | machreg_to_vec(rd.to_reg()),
1840                    )
1841                }
1842            },
1843            &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {
1844                debug_assert_eq!(rd.to_reg(), ri);
1845                match fpu_op {
1846                    FPUOpRIMod::Sli64(imm) => {
1847                        debug_assert_eq!(64, imm.lane_size_in_bits);
1848                        sink.put4(
1849                            0b01_1_111110_0000000_010101_00000_00000
1850                                | imm.enc() << 16
1851                                | machreg_to_vec(rn) << 5
1852                                | machreg_to_vec(rd.to_reg()),
1853                        )
1854                    }
1855                    FPUOpRIMod::Sli32(imm) => {
1856                        debug_assert_eq!(32, imm.lane_size_in_bits);
1857                        sink.put4(
1858                            0b0_0_1_011110_0000000_010101_00000_00000
1859                                | imm.enc() << 16
1860                                | machreg_to_vec(rn) << 5
1861                                | machreg_to_vec(rd.to_reg()),
1862                        )
1863                    }
1864                }
1865            }
1866            &Inst::FpuRRRR {
1867                fpu_op,
1868                size,
1869                rd,
1870                rn,
1871                rm,
1872                ra,
1873            } => {
1874                let top17 = match fpu_op {
1875                    FPUOp3::MAdd => 0b000_11111_00_0_00000_0,
1876                    FPUOp3::MSub => 0b000_11111_00_0_00000_1,
1877                };
1878                let top17 = top17 | size.ftype() << 7;
1879                sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
1880            }
1881            &Inst::VecMisc { op, rd, rn, size } => {
1882                let (q, enc_size) = size.enc_size();
1883                let (u, bits_12_16, size) = match op {
1884                    VecMisc2::Not => (0b1, 0b00101, 0b00),
1885                    VecMisc2::Neg => (0b1, 0b01011, enc_size),
1886                    VecMisc2::Abs => (0b0, 0b01011, enc_size),
1887                    VecMisc2::Fabs => {
1888                        debug_assert!(
1889                            size == VectorSize::Size32x2
1890                                || size == VectorSize::Size32x4
1891                                || size == VectorSize::Size64x2
1892                        );
1893                        (0b0, 0b01111, enc_size)
1894                    }
1895                    VecMisc2::Fneg => {
1896                        debug_assert!(
1897                            size == VectorSize::Size32x2
1898                                || size == VectorSize::Size32x4
1899                                || size == VectorSize::Size64x2
1900                        );
1901                        (0b1, 0b01111, enc_size)
1902                    }
1903                    VecMisc2::Fsqrt => {
1904                        debug_assert!(
1905                            size == VectorSize::Size32x2
1906                                || size == VectorSize::Size32x4
1907                                || size == VectorSize::Size64x2
1908                        );
1909                        (0b1, 0b11111, enc_size)
1910                    }
1911                    VecMisc2::Rev16 => {
1912                        debug_assert_eq!(size, VectorSize::Size8x16);
1913                        (0b0, 0b00001, enc_size)
1914                    }
1915                    VecMisc2::Rev32 => {
1916                        debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
1917                        (0b1, 0b00000, enc_size)
1918                    }
1919                    VecMisc2::Rev64 => {
1920                        debug_assert!(
1921                            size == VectorSize::Size8x16
1922                                || size == VectorSize::Size16x8
1923                                || size == VectorSize::Size32x4
1924                        );
1925                        (0b0, 0b00000, enc_size)
1926                    }
1927                    VecMisc2::Fcvtzs => {
1928                        debug_assert!(
1929                            size == VectorSize::Size32x2
1930                                || size == VectorSize::Size32x4
1931                                || size == VectorSize::Size64x2
1932                        );
1933                        (0b0, 0b11011, enc_size)
1934                    }
1935                    VecMisc2::Fcvtzu => {
1936                        debug_assert!(
1937                            size == VectorSize::Size32x2
1938                                || size == VectorSize::Size32x4
1939                                || size == VectorSize::Size64x2
1940                        );
1941                        (0b1, 0b11011, enc_size)
1942                    }
1943                    VecMisc2::Scvtf => {
1944                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1945                        (0b0, 0b11101, enc_size & 0b1)
1946                    }
1947                    VecMisc2::Ucvtf => {
1948                        debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1949                        (0b1, 0b11101, enc_size & 0b1)
1950                    }
1951                    VecMisc2::Frintn => {
1952                        debug_assert!(
1953                            size == VectorSize::Size32x2
1954                                || size == VectorSize::Size32x4
1955                                || size == VectorSize::Size64x2
1956                        );
1957                        (0b0, 0b11000, enc_size & 0b01)
1958                    }
1959                    VecMisc2::Frintz => {
1960                        debug_assert!(
1961                            size == VectorSize::Size32x2
1962                                || size == VectorSize::Size32x4
1963                                || size == VectorSize::Size64x2
1964                        );
1965                        (0b0, 0b11001, enc_size)
1966                    }
1967                    VecMisc2::Frintm => {
1968                        debug_assert!(
1969                            size == VectorSize::Size32x2
1970                                || size == VectorSize::Size32x4
1971                                || size == VectorSize::Size64x2
1972                        );
1973                        (0b0, 0b11001, enc_size & 0b01)
1974                    }
1975                    VecMisc2::Frintp => {
1976                        debug_assert!(
1977                            size == VectorSize::Size32x2
1978                                || size == VectorSize::Size32x4
1979                                || size == VectorSize::Size64x2
1980                        );
1981                        (0b0, 0b11000, enc_size)
1982                    }
1983                    VecMisc2::Cnt => {
1984                        debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
1985                        (0b0, 0b00101, enc_size)
1986                    }
1987                    VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
1988                    VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
1989                    VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
1990                    VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
1991                    VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
1992                    VecMisc2::Fcmeq0 => {
1993                        debug_assert!(
1994                            size == VectorSize::Size32x2
1995                                || size == VectorSize::Size32x4
1996                                || size == VectorSize::Size64x2
1997                        );
1998                        (0b0, 0b01101, enc_size)
1999                    }
2000                    VecMisc2::Fcmge0 => {
2001                        debug_assert!(
2002                            size == VectorSize::Size32x2
2003                                || size == VectorSize::Size32x4
2004                                || size == VectorSize::Size64x2
2005                        );
2006                        (0b1, 0b01100, enc_size)
2007                    }
2008                    VecMisc2::Fcmgt0 => {
2009                        debug_assert!(
2010                            size == VectorSize::Size32x2
2011                                || size == VectorSize::Size32x4
2012                                || size == VectorSize::Size64x2
2013                        );
2014                        (0b0, 0b01100, enc_size)
2015                    }
2016                    VecMisc2::Fcmle0 => {
2017                        debug_assert!(
2018                            size == VectorSize::Size32x2
2019                                || size == VectorSize::Size32x4
2020                                || size == VectorSize::Size64x2
2021                        );
2022                        (0b1, 0b01101, enc_size)
2023                    }
2024                    VecMisc2::Fcmlt0 => {
2025                        debug_assert!(
2026                            size == VectorSize::Size32x2
2027                                || size == VectorSize::Size32x4
2028                                || size == VectorSize::Size64x2
2029                        );
2030                        (0b0, 0b01110, enc_size)
2031                    }
2032                };
2033                sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
2034            }
2035            &Inst::VecLanes { op, rd, rn, size } => {
2036                let (q, size) = match size {
2037                    VectorSize::Size8x8 => (0b0, 0b00),
2038                    VectorSize::Size8x16 => (0b1, 0b00),
2039                    VectorSize::Size16x4 => (0b0, 0b01),
2040                    VectorSize::Size16x8 => (0b1, 0b01),
2041                    VectorSize::Size32x4 => (0b1, 0b10),
2042                    _ => unreachable!(),
2043                };
2044                let (u, opcode) = match op {
2045                    VecLanesOp::Uminv => (0b1, 0b11010),
2046                    VecLanesOp::Addv => (0b0, 0b11011),
2047                };
2048                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
2049            }
2050            &Inst::VecShiftImm {
2051                op,
2052                rd,
2053                rn,
2054                size,
2055                imm,
2056            } => {
2057                let (is_shr, mut template) = match op {
2058                    VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
2059                    VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
2060                    VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
2061                };
2062                if size.is_128bits() {
2063                    template |= 0b1 << 30;
2064                }
2065                let imm = imm as u32;
2066                // Deal with the somewhat strange encoding scheme for, and limits on,
2067                // the shift amount.
2068                let immh_immb = match (size.lane_size(), is_shr) {
2069                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2070                        0b_1000_000_u32 | (64 - imm)
2071                    }
2072                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2073                        0b_0100_000_u32 | (32 - imm)
2074                    }
2075                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2076                        0b_0010_000_u32 | (16 - imm)
2077                    }
2078                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2079                        0b_0001_000_u32 | (8 - imm)
2080                    }
2081                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2082                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2083                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2084                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2085                    _ => panic!(
2086                        "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
2087                        op, size, imm
2088                    ),
2089                };
2090                let rn_enc = machreg_to_vec(rn);
2091                let rd_enc = machreg_to_vec(rd.to_reg());
2092                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2093            }
2094            &Inst::VecShiftImmMod {
2095                op,
2096                rd,
2097                ri,
2098                rn,
2099                size,
2100                imm,
2101            } => {
2102                debug_assert_eq!(rd.to_reg(), ri);
2103                let (is_shr, mut template) = match op {
2104                    VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),
2105                };
2106                if size.is_128bits() {
2107                    template |= 0b1 << 30;
2108                }
2109                let imm = imm as u32;
2110                // Deal with the somewhat strange encoding scheme for, and limits on,
2111                // the shift amount.
2112                let immh_immb = match (size.lane_size(), is_shr) {
2113                    (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
2114                        0b_1000_000_u32 | (64 - imm)
2115                    }
2116                    (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
2117                        0b_0100_000_u32 | (32 - imm)
2118                    }
2119                    (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
2120                        0b_0010_000_u32 | (16 - imm)
2121                    }
2122                    (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
2123                        0b_0001_000_u32 | (8 - imm)
2124                    }
2125                    (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
2126                    (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
2127                    (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
2128                    (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
2129                    _ => panic!(
2130                        "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}",
2131                        op, size, imm
2132                    ),
2133                };
2134                let rn_enc = machreg_to_vec(rn);
2135                let rd_enc = machreg_to_vec(rd.to_reg());
2136                sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
2137            }
2138            &Inst::VecExtract { rd, rn, rm, imm4 } => {
2139                if imm4 < 16 {
2140                    let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
2141                    let rm_enc = machreg_to_vec(rm);
2142                    let rn_enc = machreg_to_vec(rn);
2143                    let rd_enc = machreg_to_vec(rd.to_reg());
2144                    sink.put4(
2145                        template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
2146                    );
2147                } else {
2148                    panic!(
2149                        "aarch64: Inst::VecExtract: emit: invalid extract index {}",
2150                        imm4
2151                    );
2152                }
2153            }
2154            &Inst::VecTbl { rd, rn, rm } => {
2155                sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));
2156            }
2157            &Inst::VecTblExt { rd, ri, rn, rm } => {
2158                debug_assert_eq!(rd.to_reg(), ri);
2159                sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));
2160            }
2161            &Inst::VecTbl2 { rd, rn, rn2, rm } => {
2162                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2163                sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));
2164            }
2165            &Inst::VecTbl2Ext {
2166                rd,
2167                ri,
2168                rn,
2169                rn2,
2170                rm,
2171            } => {
2172                debug_assert_eq!(rd.to_reg(), ri);
2173                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
2174                sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));
2175            }
2176            &Inst::FpuCmp { size, rn, rm } => {
2177                sink.put4(enc_fcmp(size, rn, rm));
2178            }
2179            &Inst::FpuToInt { op, rd, rn } => {
2180                let top16 = match op {
2181                    // FCVTZS (32/32-bit)
2182                    FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
2183                    // FCVTZU (32/32-bit)
2184                    FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
2185                    // FCVTZS (32/64-bit)
2186                    FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
2187                    // FCVTZU (32/64-bit)
2188                    FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
2189                    // FCVTZS (64/32-bit)
2190                    FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
2191                    // FCVTZU (64/32-bit)
2192                    FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
2193                    // FCVTZS (64/64-bit)
2194                    FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
2195                    // FCVTZU (64/64-bit)
2196                    FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
2197                };
2198                sink.put4(enc_fputoint(top16, rd, rn));
2199            }
2200            &Inst::IntToFpu { op, rd, rn } => {
2201                let top16 = match op {
2202                    // SCVTF (32/32-bit)
2203                    IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
2204                    // UCVTF (32/32-bit)
2205                    IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
2206                    // SCVTF (64/32-bit)
2207                    IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
2208                    // UCVTF (64/32-bit)
2209                    IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
2210                    // SCVTF (32/64-bit)
2211                    IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
2212                    // UCVTF (32/64-bit)
2213                    IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
2214                    // SCVTF (64/64-bit)
2215                    IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
2216                    // UCVTF (64/64-bit)
2217                    IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
2218                };
2219                sink.put4(enc_inttofpu(top16, rd, rn));
2220            }
2221            &Inst::FpuCSel32 { rd, rn, rm, cond } => {
2222                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
2223            }
2224            &Inst::FpuCSel64 { rd, rn, rm, cond } => {
2225                sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
2226            }
2227            &Inst::FpuRound { op, rd, rn } => {
2228                let top22 = match op {
2229                    FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
2230                    FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
2231                    FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
2232                    FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
2233                    FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
2234                    FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
2235                    FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
2236                    FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
2237                };
2238                sink.put4(enc_fround(top22, rd, rn));
2239            }
2240            &Inst::MovToFpu { rd, rn, size } => {
2241                let template = match size {
2242                    ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
2243                    ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
2244                    _ => unreachable!(),
2245                };
2246                sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
2247            }
2248            &Inst::FpuMoveFPImm { rd, imm, size } => {
2249                let size_code = match size {
2250                    ScalarSize::Size32 => 0b00,
2251                    ScalarSize::Size64 => 0b01,
2252                    _ => unimplemented!(),
2253                };
2254                sink.put4(
2255                    0b000_11110_00_1_00_000_000100_00000_00000
2256                        | size_code << 22
2257                        | ((imm.enc_bits() as u32) << 13)
2258                        | machreg_to_vec(rd.to_reg()),
2259                );
2260            }
2261            &Inst::MovToVec {
2262                rd,
2263                ri,
2264                rn,
2265                idx,
2266                size,
2267            } => {
2268                debug_assert_eq!(rd.to_reg(), ri);
2269                let (imm5, shift) = match size.lane_size() {
2270                    ScalarSize::Size8 => (0b00001, 1),
2271                    ScalarSize::Size16 => (0b00010, 2),
2272                    ScalarSize::Size32 => (0b00100, 3),
2273                    ScalarSize::Size64 => (0b01000, 4),
2274                    _ => unreachable!(),
2275                };
2276                debug_assert_eq!(idx & (0b11111 >> shift), idx);
2277                let imm5 = imm5 | ((idx as u32) << shift);
2278                sink.put4(
2279                    0b010_01110000_00000_0_0011_1_00000_00000
2280                        | (imm5 << 16)
2281                        | (machreg_to_gpr(rn) << 5)
2282                        | machreg_to_vec(rd.to_reg()),
2283                );
2284            }
2285            &Inst::MovFromVec { rd, rn, idx, size } => {
2286                let (q, imm5, shift, mask) = match size {
2287                    ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),
2288                    ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),
2289                    ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),
2290                    ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),
2291                    _ => panic!("Unexpected scalar FP operand size: {:?}", size),
2292                };
2293                debug_assert_eq!(idx & mask, idx);
2294                let imm5 = imm5 | ((idx as u32) << shift);
2295                sink.put4(
2296                    0b000_01110000_00000_0_0111_1_00000_00000
2297                        | (q << 30)
2298                        | (imm5 << 16)
2299                        | (machreg_to_vec(rn) << 5)
2300                        | machreg_to_gpr(rd.to_reg()),
2301                );
2302            }
2303            &Inst::MovFromVecSigned {
2304                rd,
2305                rn,
2306                idx,
2307                size,
2308                scalar_size,
2309            } => {
2310                let (imm5, shift, half) = match size {
2311                    VectorSize::Size8x8 => (0b00001, 1, true),
2312                    VectorSize::Size8x16 => (0b00001, 1, false),
2313                    VectorSize::Size16x4 => (0b00010, 2, true),
2314                    VectorSize::Size16x8 => (0b00010, 2, false),
2315                    VectorSize::Size32x2 => {
2316                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2317                        (0b00100, 3, true)
2318                    }
2319                    VectorSize::Size32x4 => {
2320                        debug_assert_ne!(scalar_size, OperandSize::Size32);
2321                        (0b00100, 3, false)
2322                    }
2323                    _ => panic!("Unexpected vector operand size"),
2324                };
2325                debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
2326                let imm5 = imm5 | ((idx as u32) << shift);
2327                sink.put4(
2328                    0b000_01110000_00000_0_0101_1_00000_00000
2329                        | (scalar_size.is64() as u32) << 30
2330                        | (imm5 << 16)
2331                        | (machreg_to_vec(rn) << 5)
2332                        | machreg_to_gpr(rd.to_reg()),
2333                );
2334            }
2335            &Inst::VecDup { rd, rn, size } => {
2336                let q = size.is_128bits() as u32;
2337                let imm5 = match size.lane_size() {
2338                    ScalarSize::Size8 => 0b00001,
2339                    ScalarSize::Size16 => 0b00010,
2340                    ScalarSize::Size32 => 0b00100,
2341                    ScalarSize::Size64 => 0b01000,
2342                    _ => unreachable!(),
2343                };
2344                sink.put4(
2345                    0b0_0_0_01110000_00000_000011_00000_00000
2346                        | (q << 30)
2347                        | (imm5 << 16)
2348                        | (machreg_to_gpr(rn) << 5)
2349                        | machreg_to_vec(rd.to_reg()),
2350                );
2351            }
2352            &Inst::VecDupFromFpu { rd, rn, size, lane } => {
2353                let q = size.is_128bits() as u32;
2354                let imm5 = match size.lane_size() {
2355                    ScalarSize::Size8 => {
2356                        assert!(lane < 16);
2357                        0b00001 | (u32::from(lane) << 1)
2358                    }
2359                    ScalarSize::Size16 => {
2360                        assert!(lane < 8);
2361                        0b00010 | (u32::from(lane) << 2)
2362                    }
2363                    ScalarSize::Size32 => {
2364                        assert!(lane < 4);
2365                        0b00100 | (u32::from(lane) << 3)
2366                    }
2367                    ScalarSize::Size64 => {
2368                        assert!(lane < 2);
2369                        0b01000 | (u32::from(lane) << 4)
2370                    }
2371                    _ => unimplemented!(),
2372                };
2373                sink.put4(
2374                    0b000_01110000_00000_000001_00000_00000
2375                        | (q << 30)
2376                        | (imm5 << 16)
2377                        | (machreg_to_vec(rn) << 5)
2378                        | machreg_to_vec(rd.to_reg()),
2379                );
2380            }
2381            &Inst::VecDupFPImm { rd, imm, size } => {
2382                let imm = imm.enc_bits();
2383                let op = match size.lane_size() {
2384                    ScalarSize::Size32 => 0,
2385                    ScalarSize::Size64 => 1,
2386                    _ => unimplemented!(),
2387                };
2388                let q_op = op | ((size.is_128bits() as u32) << 1);
2389
2390                sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));
2391            }
2392            &Inst::VecDupImm {
2393                rd,
2394                imm,
2395                invert,
2396                size,
2397            } => {
2398                let (imm, shift, shift_ones) = imm.value();
2399                let (op, cmode) = match size.lane_size() {
2400                    ScalarSize::Size8 => {
2401                        assert!(!invert);
2402                        assert_eq!(shift, 0);
2403
2404                        (0, 0b1110)
2405                    }
2406                    ScalarSize::Size16 => {
2407                        let s = shift & 8;
2408
2409                        assert!(!shift_ones);
2410                        assert_eq!(s, shift);
2411
2412                        (invert as u32, 0b1000 | (s >> 2))
2413                    }
2414                    ScalarSize::Size32 => {
2415                        if shift_ones {
2416                            assert!(shift == 8 || shift == 16);
2417
2418                            (invert as u32, 0b1100 | (shift >> 4))
2419                        } else {
2420                            let s = shift & 24;
2421
2422                            assert_eq!(s, shift);
2423
2424                            (invert as u32, 0b0000 | (s >> 2))
2425                        }
2426                    }
2427                    ScalarSize::Size64 => {
2428                        assert!(!invert);
2429                        assert_eq!(shift, 0);
2430
2431                        (1, 0b1110)
2432                    }
2433                    _ => unreachable!(),
2434                };
2435                let q_op = op | ((size.is_128bits() as u32) << 1);
2436
2437                sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
2438            }
2439            &Inst::VecExtend {
2440                t,
2441                rd,
2442                rn,
2443                high_half,
2444                lane_size,
2445            } => {
2446                let immh = match lane_size {
2447                    ScalarSize::Size16 => 0b001,
2448                    ScalarSize::Size32 => 0b010,
2449                    ScalarSize::Size64 => 0b100,
2450                    _ => panic!("Unexpected VecExtend to lane size of {:?}", lane_size),
2451                };
2452                let u = match t {
2453                    VecExtendOp::Sxtl => 0b0,
2454                    VecExtendOp::Uxtl => 0b1,
2455                };
2456                sink.put4(
2457                    0b000_011110_0000_000_101001_00000_00000
2458                        | ((high_half as u32) << 30)
2459                        | (u << 29)
2460                        | (immh << 19)
2461                        | (machreg_to_vec(rn) << 5)
2462                        | machreg_to_vec(rd.to_reg()),
2463                );
2464            }
2465            &Inst::VecRRLong {
2466                op,
2467                rd,
2468                rn,
2469                high_half,
2470            } => {
2471                let (u, size, bits_12_16) = match op {
2472                    VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),
2473                    VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),
2474                    VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),
2475                    VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),
2476                    VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),
2477                };
2478
2479                sink.put4(enc_vec_rr_misc(
2480                    ((high_half as u32) << 1) | u,
2481                    size,
2482                    bits_12_16,
2483                    rd,
2484                    rn,
2485                ));
2486            }
2487            &Inst::VecRRNarrowLow {
2488                op,
2489                rd,
2490                rn,
2491                lane_size,
2492            }
2493            | &Inst::VecRRNarrowHigh {
2494                op,
2495                rd,
2496                rn,
2497                lane_size,
2498                ..
2499            } => {
2500                let high_half = match self {
2501                    &Inst::VecRRNarrowLow { .. } => false,
2502                    &Inst::VecRRNarrowHigh { .. } => true,
2503                    _ => unreachable!(),
2504                };
2505
2506                let size = match lane_size {
2507                    ScalarSize::Size8 => 0b00,
2508                    ScalarSize::Size16 => 0b01,
2509                    ScalarSize::Size32 => 0b10,
2510                    _ => panic!("unsupported size: {:?}", lane_size),
2511                };
2512
2513                // Floats use a single bit, to encode either half or single.
2514                let size = match op {
2515                    VecRRNarrowOp::Fcvtn => size >> 1,
2516                    _ => size,
2517                };
2518
2519                let (u, bits_12_16) = match op {
2520                    VecRRNarrowOp::Xtn => (0b0, 0b10010),
2521                    VecRRNarrowOp::Sqxtn => (0b0, 0b10100),
2522                    VecRRNarrowOp::Sqxtun => (0b1, 0b10010),
2523                    VecRRNarrowOp::Uqxtn => (0b1, 0b10100),
2524                    VecRRNarrowOp::Fcvtn => (0b0, 0b10110),
2525                };
2526
2527                sink.put4(enc_vec_rr_misc(
2528                    ((high_half as u32) << 1) | u,
2529                    size,
2530                    bits_12_16,
2531                    rd,
2532                    rn,
2533                ));
2534            }
2535            &Inst::VecMovElement {
2536                rd,
2537                ri,
2538                rn,
2539                dest_idx,
2540                src_idx,
2541                size,
2542            } => {
2543                debug_assert_eq!(rd.to_reg(), ri);
2544                let (imm5, shift) = match size.lane_size() {
2545                    ScalarSize::Size8 => (0b00001, 1),
2546                    ScalarSize::Size16 => (0b00010, 2),
2547                    ScalarSize::Size32 => (0b00100, 3),
2548                    ScalarSize::Size64 => (0b01000, 4),
2549                    _ => unreachable!(),
2550                };
2551                let mask = 0b11111 >> shift;
2552                debug_assert_eq!(dest_idx & mask, dest_idx);
2553                debug_assert_eq!(src_idx & mask, src_idx);
2554                let imm4 = (src_idx as u32) << (shift - 1);
2555                let imm5 = imm5 | ((dest_idx as u32) << shift);
2556                sink.put4(
2557                    0b011_01110000_00000_0_0000_1_00000_00000
2558                        | (imm5 << 16)
2559                        | (imm4 << 11)
2560                        | (machreg_to_vec(rn) << 5)
2561                        | machreg_to_vec(rd.to_reg()),
2562                );
2563            }
2564            &Inst::VecRRPair { op, rd, rn } => {
2565                let bits_12_16 = match op {
2566                    VecPairOp::Addp => 0b11011,
2567                };
2568
2569                sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
2570            }
2571            &Inst::VecRRRLong {
2572                rd,
2573                rn,
2574                rm,
2575                alu_op,
2576                high_half,
2577            } => {
2578                let (u, size, bit14) = match alu_op {
2579                    VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),
2580                    VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),
2581                    VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),
2582                    VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),
2583                    VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),
2584                    VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),
2585                };
2586                sink.put4(enc_vec_rrr_long(
2587                    high_half as u32,
2588                    u,
2589                    size,
2590                    bit14,
2591                    rm,
2592                    rn,
2593                    rd,
2594                ));
2595            }
2596            &Inst::VecRRRLongMod {
2597                rd,
2598                ri,
2599                rn,
2600                rm,
2601                alu_op,
2602                high_half,
2603            } => {
2604                debug_assert_eq!(rd.to_reg(), ri);
2605                let (u, size, bit14) = match alu_op {
2606                    VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),
2607                    VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),
2608                    VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),
2609                };
2610                sink.put4(enc_vec_rrr_long(
2611                    high_half as u32,
2612                    u,
2613                    size,
2614                    bit14,
2615                    rm,
2616                    rn,
2617                    rd,
2618                ));
2619            }
2620            &Inst::VecRRPairLong { op, rd, rn } => {
2621                let (u, size) = match op {
2622                    VecRRPairLongOp::Saddlp8 => (0b0, 0b0),
2623                    VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),
2624                    VecRRPairLongOp::Saddlp16 => (0b0, 0b1),
2625                    VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),
2626                };
2627
2628                sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));
2629            }
2630            &Inst::VecRRR {
2631                rd,
2632                rn,
2633                rm,
2634                alu_op,
2635                size,
2636            } => {
2637                let (q, enc_size) = size.enc_size();
2638                let is_float = match alu_op {
2639                    VecALUOp::Fcmeq
2640                    | VecALUOp::Fcmgt
2641                    | VecALUOp::Fcmge
2642                    | VecALUOp::Fadd
2643                    | VecALUOp::Fsub
2644                    | VecALUOp::Fdiv
2645                    | VecALUOp::Fmax
2646                    | VecALUOp::Fmin
2647                    | VecALUOp::Fmul => true,
2648                    _ => false,
2649                };
2650
2651                let (top11, bit15_10) = match alu_op {
2652                    VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
2653                    VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
2654                    VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
2655                    VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
2656                    VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
2657                    VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
2658                    VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
2659                    VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
2660                    VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
2661                    VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
2662                    VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
2663                    VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
2664                    // The following logical instructions operate on bytes, so are not encoded differently
2665                    // for the different vector types.
2666                    VecALUOp::And => (0b000_01110_00_1, 0b000111),
2667                    VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
2668                    VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
2669                    VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
2670                    VecALUOp::Umaxp => {
2671                        debug_assert_ne!(size, VectorSize::Size64x2);
2672
2673                        (0b001_01110_00_1 | enc_size << 1, 0b101001)
2674                    }
2675                    VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
2676                    VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
2677                    VecALUOp::Mul => {
2678                        debug_assert_ne!(size, VectorSize::Size64x2);
2679                        (0b000_01110_00_1 | enc_size << 1, 0b100111)
2680                    }
2681                    VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
2682                    VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
2683                    VecALUOp::Umin => {
2684                        debug_assert_ne!(size, VectorSize::Size64x2);
2685
2686                        (0b001_01110_00_1 | enc_size << 1, 0b011011)
2687                    }
2688                    VecALUOp::Smin => {
2689                        debug_assert_ne!(size, VectorSize::Size64x2);
2690
2691                        (0b000_01110_00_1 | enc_size << 1, 0b011011)
2692                    }
2693                    VecALUOp::Umax => {
2694                        debug_assert_ne!(size, VectorSize::Size64x2);
2695
2696                        (0b001_01110_00_1 | enc_size << 1, 0b011001)
2697                    }
2698                    VecALUOp::Smax => {
2699                        debug_assert_ne!(size, VectorSize::Size64x2);
2700
2701                        (0b000_01110_00_1 | enc_size << 1, 0b011001)
2702                    }
2703                    VecALUOp::Urhadd => {
2704                        debug_assert_ne!(size, VectorSize::Size64x2);
2705
2706                        (0b001_01110_00_1 | enc_size << 1, 0b000101)
2707                    }
2708                    VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
2709                    VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
2710                    VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
2711                    VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
2712                    VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
2713                    VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
2714                    VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
2715                    VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
2716                    VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
2717                    VecALUOp::Sqrdmulh => {
2718                        debug_assert!(
2719                            size.lane_size() == ScalarSize::Size16
2720                                || size.lane_size() == ScalarSize::Size32
2721                        );
2722
2723                        (0b001_01110_00_1 | enc_size << 1, 0b101101)
2724                    }
2725                    VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
2726                    VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
2727                    VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
2728                    VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
2729                };
2730                let top11 = if is_float {
2731                    top11 | size.enc_float_size() << 1
2732                } else {
2733                    top11
2734                };
2735                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2736            }
2737            &Inst::VecRRRMod {
2738                rd,
2739                ri,
2740                rn,
2741                rm,
2742                alu_op,
2743                size,
2744            } => {
2745                debug_assert_eq!(rd.to_reg(), ri);
2746                let (q, _enc_size) = size.enc_size();
2747
2748                let (top11, bit15_10) = match alu_op {
2749                    VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),
2750                    VecALUModOp::Fmla => {
2751                        (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)
2752                    }
2753                    VecALUModOp::Fmls => {
2754                        (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)
2755                    }
2756                };
2757                sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
2758            }
2759            &Inst::VecFmlaElem {
2760                rd,
2761                ri,
2762                rn,
2763                rm,
2764                alu_op,
2765                size,
2766                idx,
2767            } => {
2768                debug_assert_eq!(rd.to_reg(), ri);
2769                let idx = u32::from(idx);
2770
2771                let (q, _size) = size.enc_size();
2772                let o2 = match alu_op {
2773                    VecALUModOp::Fmla => 0b0,
2774                    VecALUModOp::Fmls => 0b1,
2775                    _ => unreachable!(),
2776                };
2777
2778                let (h, l) = match size {
2779                    VectorSize::Size32x4 => {
2780                        assert!(idx < 4);
2781                        (idx >> 1, idx & 1)
2782                    }
2783                    VectorSize::Size64x2 => {
2784                        assert!(idx < 2);
2785                        (idx, 0)
2786                    }
2787                    _ => unreachable!(),
2788                };
2789
2790                let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;
2791                let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);
2792                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
2793            }
2794            &Inst::VecLoadReplicate {
2795                rd,
2796                rn,
2797                size,
2798                flags,
2799            } => {
2800                let (q, size) = size.enc_size();
2801
2802                if let Some(trap_code) = flags.trap_code() {
2803                    // Register the offset at which the actual load instruction starts.
2804                    sink.add_trap(trap_code);
2805                }
2806
2807                sink.put4(enc_ldst_vec(q, size, rn, rd));
2808            }
2809            &Inst::VecCSel { rd, rn, rm, cond } => {
2810                /* Emit this:
2811                      b.cond  else
2812                      mov     rd, rm
2813                      b       out
2814                     else:
2815                      mov     rd, rn
2816                     out:
2817
2818                   Note, we could do better in the cases where rd == rn or rd == rm.
2819                */
2820                let else_label = sink.get_label();
2821                let out_label = sink.get_label();
2822
2823                // b.cond else
2824                let br_else_offset = sink.cur_offset();
2825                sink.put4(enc_conditional_br(
2826                    BranchTarget::Label(else_label),
2827                    CondBrKind::Cond(cond),
2828                ));
2829                sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
2830
2831                // mov rd, rm
2832                sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
2833
2834                // b out
2835                let b_out_offset = sink.cur_offset();
2836                sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
2837                sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
2838                sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
2839
2840                // else:
2841                sink.bind_label(else_label, &mut state.ctrl_plane);
2842
2843                // mov rd, rn
2844                sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
2845
2846                // out:
2847                sink.bind_label(out_label, &mut state.ctrl_plane);
2848            }
2849            &Inst::MovToNZCV { rn } => {
2850                sink.put4(0xd51b4200 | machreg_to_gpr(rn));
2851            }
2852            &Inst::MovFromNZCV { rd } => {
2853                sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
2854            }
2855            &Inst::Extend {
2856                rd,
2857                rn,
2858                signed: false,
2859                from_bits: 1,
2860                to_bits,
2861            } => {
2862                assert!(to_bits <= 64);
2863                // Reduce zero-extend-from-1-bit to:
2864                // - and rd, rn, #1
2865                // Note: This is special cased as UBFX may take more cycles
2866                // than AND on smaller cores.
2867                let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
2868                Inst::AluRRImmLogic {
2869                    alu_op: ALUOp::And,
2870                    size: OperandSize::Size32,
2871                    rd,
2872                    rn,
2873                    imml,
2874                }
2875                .emit(sink, emit_info, state);
2876            }
2877            &Inst::Extend {
2878                rd,
2879                rn,
2880                signed: false,
2881                from_bits: 32,
2882                to_bits: 64,
2883            } => {
2884                let mov = Inst::Mov {
2885                    size: OperandSize::Size32,
2886                    rd,
2887                    rm: rn,
2888                };
2889                mov.emit(sink, emit_info, state);
2890            }
2891            &Inst::Extend {
2892                rd,
2893                rn,
2894                signed,
2895                from_bits,
2896                to_bits,
2897            } => {
2898                let (opc, size) = if signed {
2899                    (0b00, OperandSize::from_bits(to_bits))
2900                } else {
2901                    (0b10, OperandSize::Size32)
2902                };
2903                sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
2904            }
2905            &Inst::Jump { ref dest } => {
2906                let off = sink.cur_offset();
2907                // Indicate that the jump uses a label, if so, so that a fixup can occur later.
2908                if let Some(l) = dest.as_label() {
2909                    sink.use_label_at_offset(off, l, LabelUse::Branch26);
2910                    sink.add_uncond_branch(off, off + 4, l);
2911                }
2912                // Emit the jump itself.
2913                sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
2914            }
2915            &Inst::Args { .. } | &Inst::Rets { .. } => {
2916                // Nothing: this is a pseudoinstruction that serves
2917                // only to constrain registers at a certain point.
2918            }
2919            &Inst::Ret {} => {
2920                sink.put4(0xd65f03c0);
2921            }
2922            &Inst::AuthenticatedRet { key, is_hint } => {
2923                let (op2, is_hint) = match key {
2924                    APIKey::AZ => (0b100, true),
2925                    APIKey::ASP => (0b101, is_hint),
2926                    APIKey::BZ => (0b110, true),
2927                    APIKey::BSP => (0b111, is_hint),
2928                };
2929
2930                if is_hint {
2931                    sink.put4(key.enc_auti_hint());
2932                    Inst::Ret {}.emit(sink, emit_info, state);
2933                } else {
2934                    sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}
2935                }
2936            }
2937            &Inst::Call { ref info } => {
2938                let (stack_map, user_stack_map) = state.take_stack_map();
2939                if let Some(s) = stack_map {
2940                    sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2941                }
2942                sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
2943                sink.put4(enc_jump26(0b100101, 0));
2944                if let Some(s) = user_stack_map {
2945                    let offset = sink.cur_offset();
2946                    sink.push_user_stack_map(state, offset, s);
2947                }
2948                sink.add_call_site();
2949
2950                if info.callee_pop_size > 0 {
2951                    let callee_pop_size =
2952                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2953                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2954                        inst.emit(sink, emit_info, state);
2955                    }
2956                }
2957            }
2958            &Inst::CallInd { ref info } => {
2959                let (stack_map, user_stack_map) = state.take_stack_map();
2960                if let Some(s) = stack_map {
2961                    sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
2962                }
2963                let rn = info.rn;
2964                sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
2965                if let Some(s) = user_stack_map {
2966                    let offset = sink.cur_offset();
2967                    sink.push_user_stack_map(state, offset, s);
2968                }
2969                sink.add_call_site();
2970
2971                if info.callee_pop_size > 0 {
2972                    let callee_pop_size =
2973                        i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");
2974                    for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
2975                        inst.emit(sink, emit_info, state);
2976                    }
2977                }
2978            }
2979            &Inst::ReturnCall {
2980                ref callee,
2981                ref info,
2982            } => {
2983                emit_return_call_common_sequence(sink, emit_info, state, info);
2984
2985                // Note: this is not `Inst::Jump { .. }.emit(..)` because we
2986                // have different metadata in this case: we don't have a label
2987                // for the target, but rather a function relocation.
2988                sink.add_reloc(Reloc::Arm64Call, &**callee, 0);
2989                sink.put4(enc_jump26(0b000101, 0));
2990                sink.add_call_site();
2991
2992                // `emit_return_call_common_sequence` emits an island if
2993                // necessary, so we can safely disable the worst-case-size check
2994                // in this case.
2995                start_off = sink.cur_offset();
2996            }
2997            &Inst::ReturnCallInd { callee, ref info } => {
2998                emit_return_call_common_sequence(sink, emit_info, state, info);
2999
3000                Inst::IndirectBr {
3001                    rn: callee,
3002                    targets: vec![],
3003                }
3004                .emit(sink, emit_info, state);
3005                sink.add_call_site();
3006
3007                // `emit_return_call_common_sequence` emits an island if
3008                // necessary, so we can safely disable the worst-case-size check
3009                // in this case.
3010                start_off = sink.cur_offset();
3011            }
3012            &Inst::CondBr {
3013                taken,
3014                not_taken,
3015                kind,
3016            } => {
3017                // Conditional part first.
3018                let cond_off = sink.cur_offset();
3019                if let Some(l) = taken.as_label() {
3020                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
3021                    let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
3022                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3023                }
3024                sink.put4(enc_conditional_br(taken, kind));
3025
3026                // Unconditional part next.
3027                let uncond_off = sink.cur_offset();
3028                if let Some(l) = not_taken.as_label() {
3029                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3030                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3031                }
3032                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3033            }
3034            &Inst::TestBitAndBranch {
3035                taken,
3036                not_taken,
3037                kind,
3038                rn,
3039                bit,
3040            } => {
3041                // Emit the conditional branch first
3042                let cond_off = sink.cur_offset();
3043                if let Some(l) = taken.as_label() {
3044                    sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);
3045                    let inverted =
3046                        enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();
3047                    sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
3048                }
3049                sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));
3050
3051                // Unconditional part next.
3052                let uncond_off = sink.cur_offset();
3053                if let Some(l) = not_taken.as_label() {
3054                    sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
3055                    sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
3056                }
3057                sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
3058            }
3059            &Inst::TrapIf { kind, trap_code } => {
3060                let label = sink.defer_trap(trap_code);
3061                // condbr KIND, LABEL
3062                let off = sink.cur_offset();
3063                sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));
3064                sink.use_label_at_offset(off, label, LabelUse::Branch19);
3065            }
3066            &Inst::IndirectBr { rn, .. } => {
3067                sink.put4(enc_br(rn));
3068            }
3069            &Inst::Nop0 => {}
3070            &Inst::Nop4 => {
3071                sink.put4(0xd503201f);
3072            }
3073            &Inst::Brk => {
3074                sink.put4(0xd4200000);
3075            }
3076            &Inst::Udf { trap_code } => {
3077                sink.add_trap(trap_code);
3078                sink.put_data(Inst::TRAP_OPCODE);
3079            }
3080            &Inst::Adr { rd, off } => {
3081                assert!(off > -(1 << 20));
3082                assert!(off < (1 << 20));
3083                sink.put4(enc_adr(off, rd));
3084            }
3085            &Inst::Adrp { rd, off } => {
3086                assert!(off > -(1 << 20));
3087                assert!(off < (1 << 20));
3088                sink.put4(enc_adrp(off, rd));
3089            }
3090            &Inst::Word4 { data } => {
3091                sink.put4(data);
3092            }
3093            &Inst::Word8 { data } => {
3094                sink.put8(data);
3095            }
3096            &Inst::JTSequence {
3097                ridx,
3098                rtmp1,
3099                rtmp2,
3100                default,
3101                ref targets,
3102                ..
3103            } => {
3104                // This sequence is *one* instruction in the vcode, and is expanded only here at
3105                // emission time, because we cannot allow the regalloc to insert spills/reloads in
3106                // the middle; we depend on hardcoded PC-rel addressing below.
3107
3108                // Branch to default when condition code from prior comparison indicates.
3109                let br =
3110                    enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));
3111
3112                // No need to inform the sink's branch folding logic about this branch, because it
3113                // will not be merged with any other branch, flipped, or elided (it is not preceded
3114                // or succeeded by any other branch). Just emit it with the label use.
3115                let default_br_offset = sink.cur_offset();
3116                sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);
3117                sink.put4(br);
3118
3119                // Overwrite the index with a zero when the above
3120                // branch misspeculates (Spectre mitigation). Save the
3121                // resulting index in rtmp2.
3122                let inst = Inst::CSel {
3123                    rd: rtmp2,
3124                    cond: Cond::Hs,
3125                    rn: zero_reg(),
3126                    rm: ridx,
3127                };
3128                inst.emit(sink, emit_info, state);
3129                // Prevent any data value speculation.
3130                Inst::Csdb.emit(sink, emit_info, state);
3131
3132                // Load address of jump table
3133                let inst = Inst::Adr { rd: rtmp1, off: 16 };
3134                inst.emit(sink, emit_info, state);
3135                // Load value out of jump table
3136                let inst = Inst::SLoad32 {
3137                    rd: rtmp2,
3138                    mem: AMode::reg_plus_reg_scaled_extended(
3139                        rtmp1.to_reg(),
3140                        rtmp2.to_reg(),
3141                        ExtendOp::UXTW,
3142                    ),
3143                    flags: MemFlags::trusted(),
3144                };
3145                inst.emit(sink, emit_info, state);
3146                // Add base of jump table to jump-table-sourced block offset
3147                let inst = Inst::AluRRR {
3148                    alu_op: ALUOp::Add,
3149                    size: OperandSize::Size64,
3150                    rd: rtmp1,
3151                    rn: rtmp1.to_reg(),
3152                    rm: rtmp2.to_reg(),
3153                };
3154                inst.emit(sink, emit_info, state);
3155                // Branch to computed address. (`targets` here is only used for successor queries
3156                // and is not needed for emission.)
3157                let inst = Inst::IndirectBr {
3158                    rn: rtmp1.to_reg(),
3159                    targets: vec![],
3160                };
3161                inst.emit(sink, emit_info, state);
3162                // Emit jump table (table of 32-bit offsets).
3163                let jt_off = sink.cur_offset();
3164                for &target in targets.iter() {
3165                    let word_off = sink.cur_offset();
3166                    // off_into_table is an addend here embedded in the label to be later patched
3167                    // at the end of codegen. The offset is initially relative to this jump table
3168                    // entry; with the extra addend, it'll be relative to the jump table's start,
3169                    // after patching.
3170                    let off_into_table = word_off - jt_off;
3171                    sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
3172                    sink.put4(off_into_table);
3173                }
3174
3175                // Lowering produces an EmitIsland before using a JTSequence, so we can safely
3176                // disable the worst-case-size check in this case.
3177                start_off = sink.cur_offset();
3178            }
3179            &Inst::LoadExtName {
3180                rd,
3181                ref name,
3182                offset,
3183            } => {
3184                if emit_info.0.is_pic() {
3185                    // See this CE Example for the variations of this with and without BTI & PAUTH
3186                    // https://godbolt.org/z/ncqjbbvvn
3187                    //
3188                    // Emit the following code:
3189                    //   adrp    rd, :got:X
3190                    //   ldr     rd, [rd, :got_lo12:X]
3191
3192                    // adrp rd, symbol
3193                    sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);
3194                    let inst = Inst::Adrp { rd, off: 0 };
3195                    inst.emit(sink, emit_info, state);
3196
3197                    // ldr rd, [rd, :got_lo12:X]
3198                    sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);
3199                    let inst = Inst::ULoad64 {
3200                        rd,
3201                        mem: AMode::reg(rd.to_reg()),
3202                        flags: MemFlags::trusted(),
3203                    };
3204                    inst.emit(sink, emit_info, state);
3205                } else {
3206                    // With absolute offsets we set up a load from a preallocated space, and then jump
3207                    // over it.
3208                    //
3209                    // Emit the following code:
3210                    //   ldr     rd, #8
3211                    //   b       #0x10
3212                    //   <8 byte space>
3213
3214                    let inst = Inst::ULoad64 {
3215                        rd,
3216                        mem: AMode::Label {
3217                            label: MemLabel::PCRel(8),
3218                        },
3219                        flags: MemFlags::trusted(),
3220                    };
3221                    inst.emit(sink, emit_info, state);
3222                    let inst = Inst::Jump {
3223                        dest: BranchTarget::ResolvedOffset(12),
3224                    };
3225                    inst.emit(sink, emit_info, state);
3226                    sink.add_reloc(Reloc::Abs8, &**name, offset);
3227                    sink.put8(0);
3228                }
3229            }
3230            &Inst::LoadAddr { rd, ref mem } => {
3231                let mem = mem.clone();
3232                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);
3233                for inst in mem_insts.into_iter() {
3234                    inst.emit(sink, emit_info, state);
3235                }
3236
3237                let (reg, index_reg, offset) = match mem {
3238                    AMode::RegExtended { rn, rm, extendop } => {
3239                        let r = rn;
3240                        (r, Some((rm, extendop)), 0)
3241                    }
3242                    AMode::Unscaled { rn, simm9 } => {
3243                        let r = rn;
3244                        (r, None, simm9.value())
3245                    }
3246                    AMode::UnsignedOffset { rn, uimm12 } => {
3247                        let r = rn;
3248                        (r, None, uimm12.value() as i32)
3249                    }
3250                    _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
3251                };
3252                let abs_offset = if offset < 0 {
3253                    -offset as u64
3254                } else {
3255                    offset as u64
3256                };
3257                let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };
3258
3259                if let Some((idx, extendop)) = index_reg {
3260                    let add = Inst::AluRRRExtend {
3261                        alu_op: ALUOp::Add,
3262                        size: OperandSize::Size64,
3263                        rd,
3264                        rn: reg,
3265                        rm: idx,
3266                        extendop,
3267                    };
3268
3269                    add.emit(sink, emit_info, state);
3270                } else if offset == 0 {
3271                    if reg != rd.to_reg() {
3272                        let mov = Inst::Mov {
3273                            size: OperandSize::Size64,
3274                            rd,
3275                            rm: reg,
3276                        };
3277
3278                        mov.emit(sink, emit_info, state);
3279                    }
3280                } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
3281                    let add = Inst::AluRRImm12 {
3282                        alu_op,
3283                        size: OperandSize::Size64,
3284                        rd,
3285                        rn: reg,
3286                        imm12,
3287                    };
3288                    add.emit(sink, emit_info, state);
3289                } else {
3290                    // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
3291                    // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
3292                    // that no other instructions will be inserted here (we're emitting directly),
3293                    // and a live range of `tmp2` should not span this instruction, so this use
3294                    // should otherwise be correct.
3295                    debug_assert!(rd.to_reg() != tmp2_reg());
3296                    debug_assert!(reg != tmp2_reg());
3297                    let tmp = writable_tmp2_reg();
3298                    for insn in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
3299                        insn.emit(sink, emit_info, state);
3300                    }
3301                    let add = Inst::AluRRR {
3302                        alu_op,
3303                        size: OperandSize::Size64,
3304                        rd,
3305                        rn: reg,
3306                        rm: tmp.to_reg(),
3307                    };
3308                    add.emit(sink, emit_info, state);
3309                }
3310            }
3311            &Inst::Paci { key } => {
3312                let (crm, op2) = match key {
3313                    APIKey::AZ => (0b0011, 0b000),
3314                    APIKey::ASP => (0b0011, 0b001),
3315                    APIKey::BZ => (0b0011, 0b010),
3316                    APIKey::BSP => (0b0011, 0b011),
3317                };
3318
3319                sink.put4(0xd503211f | (crm << 8) | (op2 << 5));
3320            }
3321            &Inst::Xpaclri => sink.put4(0xd50320ff),
3322            &Inst::Bti { targets } => {
3323                let targets = match targets {
3324                    BranchTargetType::None => 0b00,
3325                    BranchTargetType::C => 0b01,
3326                    BranchTargetType::J => 0b10,
3327                    BranchTargetType::JC => 0b11,
3328                };
3329
3330                sink.put4(0xd503241f | targets << 6);
3331            }
3332            &Inst::EmitIsland { needed_space } => {
3333                if sink.island_needed(needed_space + 4) {
3334                    let jump_around_label = sink.get_label();
3335                    let jmp = Inst::Jump {
3336                        dest: BranchTarget::Label(jump_around_label),
3337                    };
3338                    jmp.emit(sink, emit_info, state);
3339                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
3340                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
3341                }
3342            }
3343
3344            &Inst::ElfTlsGetAddr {
3345                ref symbol,
3346                rd,
3347                tmp,
3348            } => {
3349                assert_eq!(xreg(0), rd.to_reg());
3350
3351                // See the original proposal for TLSDESC.
3352                // http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
3353                //
3354                // Implement the TLSDESC instruction sequence:
3355                //   adrp x0, :tlsdesc:tlsvar
3356                //   ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3357                //   add  x0, x0, :tlsdesc_lo12:tlsvar
3358                //   blr  tmp
3359                //   mrs  tmp, tpidr_el0
3360                //   add  x0, x0, tmp
3361                //
3362                // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64
3363                // See: https://gcc.godbolt.org/z/e4j7MdErh
3364
3365                // adrp x0, :tlsdesc:tlsvar
3366                sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);
3367                Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);
3368
3369                // ldr  tmp, [x0, :tlsdesc_lo12:tlsvar]
3370                sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);
3371                Inst::ULoad64 {
3372                    rd: tmp,
3373                    mem: AMode::reg(rd.to_reg()),
3374                    flags: MemFlags::trusted(),
3375                }
3376                .emit(sink, emit_info, state);
3377
3378                // add x0, x0, :tlsdesc_lo12:tlsvar
3379                sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);
3380                Inst::AluRRImm12 {
3381                    alu_op: ALUOp::Add,
3382                    size: OperandSize::Size64,
3383                    rd,
3384                    rn: rd.to_reg(),
3385                    imm12: Imm12::maybe_from_u64(0).unwrap(),
3386                }
3387                .emit(sink, emit_info, state);
3388
3389                // blr tmp
3390                sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);
3391                Inst::CallInd {
3392                    info: crate::isa::Box::new(CallIndInfo {
3393                        rn: tmp.to_reg(),
3394                        uses: smallvec![],
3395                        defs: smallvec![],
3396                        clobbers: PRegSet::empty(),
3397                        caller_callconv: CallConv::SystemV,
3398                        callee_callconv: CallConv::SystemV,
3399                        callee_pop_size: 0,
3400                    }),
3401                }
3402                .emit(sink, emit_info, state);
3403
3404                // mrs tmp, tpidr_el0
3405                sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));
3406
3407                // add x0, x0, tmp
3408                Inst::AluRRR {
3409                    alu_op: ALUOp::Add,
3410                    size: OperandSize::Size64,
3411                    rd,
3412                    rn: rd.to_reg(),
3413                    rm: tmp.to_reg(),
3414                }
3415                .emit(sink, emit_info, state);
3416            }
3417
3418            &Inst::MachOTlsGetAddr { ref symbol, rd } => {
3419                // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
3420                // to a function that takes the descriptor address in x0, and after the function returns x0
3421                // contains the address for the thread local variable
3422                //
3423                // what we want to emit is basically:
3424                //
3425                // adrp x0, <label>@TLVPPAGE  ; Load the address of the page of the thread local variable pointer (TLVP)
3426                // ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
3427                // ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
3428                // blr x1 ; Call the function pointer with the descriptor address in x0
3429                // ; x0 now contains the TLV address
3430
3431                assert_eq!(xreg(0), rd.to_reg());
3432                let rtmp = writable_xreg(1);
3433
3434                // adrp x0, <label>@TLVPPAGE
3435                sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
3436                sink.put4(0x90000000);
3437
3438                // ldr x0, [x0, <label>@TLVPPAGEOFF]
3439                sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
3440                sink.put4(0xf9400000);
3441
3442                // load [x0] into temp register
3443                Inst::ULoad64 {
3444                    rd: rtmp,
3445                    mem: AMode::reg(rd.to_reg()),
3446                    flags: MemFlags::trusted(),
3447                }
3448                .emit(sink, emit_info, state);
3449
3450                // call function pointer in temp register
3451                Inst::CallInd {
3452                    info: crate::isa::Box::new(CallIndInfo {
3453                        rn: rtmp.to_reg(),
3454                        uses: smallvec![],
3455                        defs: smallvec![],
3456                        clobbers: PRegSet::empty(),
3457                        caller_callconv: CallConv::AppleAarch64,
3458                        callee_callconv: CallConv::AppleAarch64,
3459                        callee_pop_size: 0,
3460                    }),
3461                }
3462                .emit(sink, emit_info, state);
3463            }
3464
3465            &Inst::Unwind { ref inst } => {
3466                sink.add_unwind(inst.clone());
3467            }
3468
3469            &Inst::DummyUse { .. } => {}
3470
3471            &Inst::StackProbeLoop { start, end, step } => {
3472                assert!(emit_info.0.enable_probestack());
3473
3474                // The loop generated here uses `start` as a counter register to
3475                // count backwards until negating it exceeds `end`. In other
3476                // words `start` is an offset from `sp` we're testing where
3477                // `end` is the max size we need to test. The loop looks like:
3478                //
3479                //      loop_start:
3480                //          sub start, start, #step
3481                //          stur xzr, [sp, start]
3482                //          cmn start, end
3483                //          br.gt loop_start
3484                //      loop_end:
3485                //
3486                // Note that this loop cannot use the spilltmp and tmp2
3487                // registers as those are currently used as the input to this
3488                // loop when generating the instruction. This means that some
3489                // more flavorful address modes and lowerings need to be
3490                // avoided.
3491                //
3492                // Perhaps someone more clever than I can figure out how to use
3493                // `subs` or the like and skip the `cmn`, but I can't figure it
3494                // out at this time.
3495
3496                let loop_start = sink.get_label();
3497                sink.bind_label(loop_start, &mut state.ctrl_plane);
3498
3499                Inst::AluRRImm12 {
3500                    alu_op: ALUOp::Sub,
3501                    size: OperandSize::Size64,
3502                    rd: start,
3503                    rn: start.to_reg(),
3504                    imm12: step,
3505                }
3506                .emit(sink, emit_info, state);
3507                Inst::Store32 {
3508                    rd: regs::zero_reg(),
3509                    mem: AMode::RegReg {
3510                        rn: regs::stack_reg(),
3511                        rm: start.to_reg(),
3512                    },
3513                    flags: MemFlags::trusted(),
3514                }
3515                .emit(sink, emit_info, state);
3516                Inst::AluRRR {
3517                    alu_op: ALUOp::AddS,
3518                    size: OperandSize::Size64,
3519                    rd: regs::writable_zero_reg(),
3520                    rn: start.to_reg(),
3521                    rm: end,
3522                }
3523                .emit(sink, emit_info, state);
3524
3525                let loop_end = sink.get_label();
3526                Inst::CondBr {
3527                    taken: BranchTarget::Label(loop_start),
3528                    not_taken: BranchTarget::Label(loop_end),
3529                    kind: CondBrKind::Cond(Cond::Gt),
3530                }
3531                .emit(sink, emit_info, state);
3532                sink.bind_label(loop_end, &mut state.ctrl_plane);
3533            }
3534        }
3535
3536        let end_off = sink.cur_offset();
3537        debug_assert!(
3538            (end_off - start_off) <= Inst::worst_case_size()
3539                || matches!(self, Inst::EmitIsland { .. }),
3540            "Worst case size exceed for {:?}: {}",
3541            self,
3542            end_off - start_off
3543        );
3544
3545        state.clear_post_insn();
3546    }
3547
3548    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
3549        self.print_with_state(state)
3550    }
3551}
3552
3553fn emit_return_call_common_sequence(
3554    sink: &mut MachBuffer<Inst>,
3555    emit_info: &EmitInfo,
3556    state: &mut EmitState,
3557    info: &ReturnCallInfo,
3558) {
3559    for inst in
3560        AArch64MachineDeps::gen_clobber_restore(CallConv::Tail, &emit_info.0, state.frame_layout())
3561    {
3562        inst.emit(sink, emit_info, state);
3563    }
3564
3565    let setup_area_size = state.frame_layout().setup_area_size;
3566    if setup_area_size > 0 {
3567        // N.B.: sp is already adjusted to the appropriate place by the
3568        // clobber-restore code (which also frees the fixed frame). Hence, there
3569        // is no need for the usual `mov sp, fp` here.
3570
3571        // `ldp fp, lr, [sp], #16`
3572        Inst::LoadP64 {
3573            rt: writable_fp_reg(),
3574            rt2: writable_link_reg(),
3575            mem: PairAMode::SPPostIndexed {
3576                // TODO: we could fold the increment for incoming_args_diff here, as long as that
3577                // value is less than 502*8, by adding it to `setup_area_size`.
3578                // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-
3579                simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),
3580            },
3581            flags: MemFlags::trusted(),
3582        }
3583        .emit(sink, emit_info, state);
3584    }
3585
3586    // Adjust SP to account for the possible over-allocation in the prologue.
3587    let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;
3588    if incoming_args_diff > 0 {
3589        for inst in
3590            AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())
3591        {
3592            inst.emit(sink, emit_info, state);
3593        }
3594    }
3595
3596    if let Some(key) = info.key {
3597        sink.put4(key.enc_auti_hint());
3598    }
3599}