cranelift_codegen/machinst/
vcode.rs

1//! This implements the VCode container: a CFG of Insts that have been lowered.
2//!
3//! VCode is virtual-register code. An instruction in VCode is almost a machine
4//! instruction; however, its register slots can refer to virtual registers in
5//! addition to real machine registers.
6//!
7//! VCode is structured with traditional basic blocks, and
8//! each block must be terminated by an unconditional branch (one target), a
9//! conditional branch (two targets), or a return (no targets). Note that this
10//! slightly differs from the machine code of most ISAs: in most ISAs, a
11//! conditional branch has one target (and the not-taken case falls through).
12//! However, we expect that machine backends will elide branches to the following
13//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15//! play with layout prior to final binary emission, as well, if we want.
16//!
17//! See the main module comment in `mod.rs` for more details on the VCode-based
18//! backend pipeline.
19
20use crate::ir::pcc::*;
21use crate::ir::{self, types, Constant, ConstantData, ValueLabel};
22use crate::machinst::*;
23use crate::ranges::Ranges;
24use crate::timing;
25use crate::trace;
26use crate::CodegenError;
27use crate::{LabelValueLoc, ValueLocRange};
28use regalloc2::{
29    Edit, Function as RegallocFunction, InstOrEdit, InstRange, MachineEnv, Operand,
30    OperandConstraint, OperandKind, PRegSet, RegClass,
31};
32use rustc_hash::FxHashMap;
33
34use core::mem::take;
35use cranelift_entity::{entity_impl, Keys};
36use std::collections::hash_map::Entry;
37use std::collections::HashMap;
38use std::fmt;
39
40/// Index referring to an instruction in VCode.
41pub type InsnIndex = regalloc2::Inst;
42
43/// Extension trait for `InsnIndex` to allow conversion to a
44/// `BackwardsInsnIndex`.
45trait ToBackwardsInsnIndex {
46    fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
47}
48
49impl ToBackwardsInsnIndex for InsnIndex {
50    fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
51        BackwardsInsnIndex::new(num_insts - self.index() - 1)
52    }
53}
54
55/// An index referring to an instruction in the VCode when it is backwards,
56/// during VCode construction.
57#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58#[cfg_attr(
59    feature = "enable-serde",
60    derive(::serde::Serialize, ::serde::Deserialize)
61)]
62pub struct BackwardsInsnIndex(InsnIndex);
63
64impl BackwardsInsnIndex {
65    pub fn new(i: usize) -> Self {
66        BackwardsInsnIndex(InsnIndex::new(i))
67    }
68}
69
70/// Index referring to a basic block in VCode.
71pub type BlockIndex = regalloc2::Block;
72
73/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
74/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
75pub trait VCodeInst: MachInst + MachInstEmit {}
76impl<I: MachInst + MachInstEmit> VCodeInst for I {}
77
78/// A function in "VCode" (virtualized-register code) form, after
79/// lowering.  This is essentially a standard CFG of basic blocks,
80/// where each basic block consists of lowered instructions produced
81/// by the machine-specific backend.
82///
83/// Note that the VCode is immutable once produced, and is not
84/// modified by register allocation in particular. Rather, register
85/// allocation on the `VCode` produces a separate `regalloc2::Output`
86/// struct, and this can be passed to `emit`. `emit` in turn does not
87/// modify the vcode, but produces an `EmitResult`, which contains the
88/// machine code itself, and the associated disassembly and/or
89/// metadata as requested.
90pub struct VCode<I: VCodeInst> {
91    /// VReg IR-level types.
92    vreg_types: Vec<Type>,
93
94    /// Lowered machine instructions in order corresponding to the original IR.
95    insts: Vec<I>,
96
97    /// A map from backwards instruction index to the user stack map for that
98    /// instruction.
99    ///
100    /// This is a sparse side table that only has entries for instructions that
101    /// are safepoints, and only for a subset of those that have an associated
102    /// user stack map.
103    user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
104
105    /// Operands: pre-regalloc references to virtual registers with
106    /// constraints, in one flattened array. This allows the regalloc
107    /// to efficiently access all operands without requiring expensive
108    /// matches or method invocations on insts.
109    operands: Vec<Operand>,
110
111    /// Operand index ranges: for each instruction in `insts`, there
112    /// is a tuple here providing the range in `operands` for that
113    /// instruction's operands.
114    operand_ranges: Ranges,
115
116    /// Clobbers: a sparse map from instruction indices to clobber masks.
117    clobbers: FxHashMap<InsnIndex, PRegSet>,
118
119    /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
120    /// reasonable to keep one of these per instruction.)
121    srclocs: Vec<RelSourceLoc>,
122
123    /// Entry block.
124    entry: BlockIndex,
125
126    /// Block instruction indices.
127    block_ranges: Ranges,
128
129    /// Block successors: index range in the `block_succs` list.
130    block_succ_range: Ranges,
131
132    /// Block successor lists, concatenated into one vec. The
133    /// `block_succ_range` list of tuples above gives (start, end)
134    /// ranges within this list that correspond to each basic block's
135    /// successors.
136    block_succs: Vec<regalloc2::Block>,
137
138    /// Block predecessors: index range in the `block_preds` list.
139    block_pred_range: Ranges,
140
141    /// Block predecessor lists, concatenated into one vec. The
142    /// `block_pred_range` list of tuples above gives (start, end)
143    /// ranges within this list that correspond to each basic block's
144    /// predecessors.
145    block_preds: Vec<regalloc2::Block>,
146
147    /// Block parameters: index range in `block_params` below.
148    block_params_range: Ranges,
149
150    /// Block parameter lists, concatenated into one vec. The
151    /// `block_params_range` list of tuples above gives (start, end)
152    /// ranges within this list that correspond to each basic block's
153    /// blockparam vregs.
154    block_params: Vec<regalloc2::VReg>,
155
156    /// Outgoing block arguments on branch instructions, concatenated
157    /// into one list.
158    ///
159    /// Note that this is conceptually a 3D array: we have a VReg list
160    /// per block, per successor. We flatten those three dimensions
161    /// into this 1D vec, then store index ranges in two levels of
162    /// indirection.
163    ///
164    /// Indexed by the indices in `branch_block_arg_succ_range`.
165    branch_block_args: Vec<regalloc2::VReg>,
166
167    /// Array of sequences of (start, end) tuples in
168    /// `branch_block_args`, one for each successor; these sequences
169    /// for each block are concatenated.
170    ///
171    /// Indexed by the indices in `branch_block_arg_succ_range`.
172    branch_block_arg_range: Ranges,
173
174    /// For a given block, indices in `branch_block_arg_range`
175    /// corresponding to all of its successors.
176    branch_block_arg_succ_range: Ranges,
177
178    /// Block-order information.
179    block_order: BlockLoweringOrder,
180
181    /// ABI object.
182    pub(crate) abi: Callee<I::ABIMachineSpec>,
183
184    /// Constant information used during code emission. This should be
185    /// immutable across function compilations within the same module.
186    emit_info: I::Info,
187
188    /// Reference-typed `regalloc2::VReg`s. The regalloc requires
189    /// these in a dense slice (as opposed to querying the
190    /// reftype-status of each vreg) for efficient iteration.
191    reftyped_vregs: Vec<VReg>,
192
193    /// Constants.
194    pub(crate) constants: VCodeConstants,
195
196    /// Value labels for debuginfo attached to vregs.
197    debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
198
199    pub(crate) sigs: SigSet,
200
201    /// Facts on VRegs, for proof-carrying code verification.
202    facts: Vec<Option<Fact>>,
203}
204
205/// The result of `VCode::emit`. Contains all information computed
206/// during emission: actual machine code, optionally a disassembly,
207/// and optionally metadata about the code layout.
208pub struct EmitResult {
209    /// The MachBuffer containing the machine code.
210    pub buffer: MachBufferFinalized<Stencil>,
211
212    /// Offset of each basic block, recorded during emission. Computed
213    /// only if `debug_value_labels` is non-empty.
214    pub bb_offsets: Vec<CodeOffset>,
215
216    /// Final basic-block edges, in terms of code offsets of
217    /// bb-starts. Computed only if `debug_value_labels` is non-empty.
218    pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
219
220    /// Final length of function body.
221    pub func_body_len: CodeOffset,
222
223    /// The pretty-printed disassembly, if any. This uses the same
224    /// pretty-printing for MachInsts as the pre-regalloc VCode Debug
225    /// implementation, but additionally includes the prologue and
226    /// epilogue(s), and makes use of the regalloc results.
227    pub disasm: Option<String>,
228
229    /// Offsets of sized stackslots.
230    pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
231
232    /// Offsets of dynamic stackslots.
233    pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
234
235    /// Value-labels information (debug metadata).
236    pub value_labels_ranges: ValueLabelsRanges,
237
238    /// Stack frame size.
239    pub frame_size: u32,
240}
241
242/// A builder for a VCode function body.
243///
244/// This builder has the ability to accept instructions in either
245/// forward or reverse order, depending on the pass direction that
246/// produces the VCode. The lowering from CLIF to VCode<MachInst>
247/// ordinarily occurs in reverse order (in order to allow instructions
248/// to be lowered only if used, and not merged) so a reversal will
249/// occur at the end of lowering to ensure the VCode is in machine
250/// order.
251///
252/// If built in reverse, block and instruction indices used once the
253/// VCode is built are relative to the final (reversed) order, not the
254/// order of construction. Note that this means we do not know the
255/// final block or instruction indices when building, so we do not
256/// hand them out. (The user is assumed to know them when appending
257/// terminator instructions with successor blocks.)
258pub struct VCodeBuilder<I: VCodeInst> {
259    /// In-progress VCode.
260    pub(crate) vcode: VCode<I>,
261
262    /// In what direction is the build occurring?
263    direction: VCodeBuildDirection,
264
265    /// Debug-value label in-progress map, keyed by label. For each
266    /// label, we keep disjoint ranges mapping to vregs. We'll flatten
267    /// this into (vreg, range, label) tuples when done.
268    debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
269}
270
271/// Direction in which a VCodeBuilder builds VCode.
272#[derive(Clone, Copy, Debug, PartialEq, Eq)]
273pub enum VCodeBuildDirection {
274    // TODO: add `Forward` once we need it and can test it adequately.
275    /// Backward-build pass: we expect the producer to call `emit()`
276    /// with instructions in reverse program order within each block.
277    Backward,
278}
279
280impl<I: VCodeInst> VCodeBuilder<I> {
281    /// Create a new VCodeBuilder.
282    pub fn new(
283        sigs: SigSet,
284        abi: Callee<I::ABIMachineSpec>,
285        emit_info: I::Info,
286        block_order: BlockLoweringOrder,
287        constants: VCodeConstants,
288        direction: VCodeBuildDirection,
289    ) -> Self {
290        let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
291
292        VCodeBuilder {
293            vcode,
294            direction,
295            debug_info: FxHashMap::default(),
296        }
297    }
298
299    pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
300        self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
301    }
302
303    /// Access the ABI object.
304    pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
305        &self.vcode.abi
306    }
307
308    /// Access the ABI object.
309    pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
310        &mut self.vcode.abi
311    }
312
313    pub fn sigs(&self) -> &SigSet {
314        &self.vcode.sigs
315    }
316
317    pub fn sigs_mut(&mut self) -> &mut SigSet {
318        &mut self.vcode.sigs
319    }
320
321    /// Access to the BlockLoweringOrder object.
322    pub fn block_order(&self) -> &BlockLoweringOrder {
323        &self.vcode.block_order
324    }
325
326    /// Set the current block as the entry block.
327    pub fn set_entry(&mut self, block: BlockIndex) {
328        self.vcode.entry = block;
329    }
330
331    /// End the current basic block. Must be called after emitting vcode insts
332    /// for IR insts and prior to ending the function (building the VCode).
333    pub fn end_bb(&mut self) {
334        let end_idx = self.vcode.insts.len();
335        // Add the instruction index range to the list of blocks.
336        self.vcode.block_ranges.push_end(end_idx);
337        // End the successors list.
338        let succ_end = self.vcode.block_succs.len();
339        self.vcode.block_succ_range.push_end(succ_end);
340        // End the blockparams list.
341        let block_params_end = self.vcode.block_params.len();
342        self.vcode.block_params_range.push_end(block_params_end);
343        // End the branch blockparam args list.
344        let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
345        self.vcode
346            .branch_block_arg_succ_range
347            .push_end(branch_block_arg_succ_end);
348    }
349
350    pub fn add_block_param(&mut self, param: VirtualReg) {
351        self.vcode.block_params.push(param.into());
352    }
353
354    fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
355        self.vcode
356            .branch_block_args
357            .extend(args.iter().map(|&arg| VReg::from(arg)));
358        let end = self.vcode.branch_block_args.len();
359        self.vcode.branch_block_arg_range.push_end(end);
360    }
361
362    /// Push an instruction for the current BB and current IR inst
363    /// within the BB.
364    pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
365        self.vcode.insts.push(insn);
366        self.vcode.srclocs.push(loc);
367    }
368
369    /// Add a successor block with branch args.
370    pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
371        self.vcode.block_succs.push(block);
372        self.add_branch_args_for_succ(args);
373    }
374
375    /// Add a debug value label to a register.
376    pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
377        // We'll fix up labels in reverse(). Because we're generating
378        // code bottom-to-top, the liverange of the label goes *from*
379        // the last index at which was defined (or 0, which is the end
380        // of the eventual function) *to* just this instruction, and
381        // no further.
382        let inst = InsnIndex::new(self.vcode.insts.len());
383        let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
384        let last = labels
385            .last()
386            .map(|(_start, end, _vreg)| *end)
387            .unwrap_or(InsnIndex::new(0));
388        labels.push((last, inst, reg.into()));
389    }
390
391    /// Access the constants.
392    pub fn constants(&mut self) -> &mut VCodeConstants {
393        &mut self.vcode.constants
394    }
395
396    fn compute_preds_from_succs(&mut self) {
397        // Do a linear-time counting sort: first determine how many
398        // times each block appears as a successor.
399        let mut starts = vec![0u32; self.vcode.num_blocks()];
400        for succ in &self.vcode.block_succs {
401            starts[succ.index()] += 1;
402        }
403
404        // Determine for each block the starting index where that
405        // block's predecessors should go. This is equivalent to the
406        // ranges we need to store in block_pred_range.
407        self.vcode.block_pred_range.reserve(starts.len());
408        let mut end = 0;
409        for count in starts.iter_mut() {
410            let start = end;
411            end += *count;
412            *count = start;
413            self.vcode.block_pred_range.push_end(end as usize);
414        }
415        let end = end as usize;
416        debug_assert_eq!(end, self.vcode.block_succs.len());
417
418        // Walk over the successors again, this time grouped by
419        // predecessor, and push the predecessor at the current
420        // starting position of each of its successors. We build
421        // each group of predecessors in whatever order Ranges::iter
422        // returns them; regalloc2 doesn't care.
423        self.vcode.block_preds.resize(end, BlockIndex::invalid());
424        for (pred, range) in self.vcode.block_succ_range.iter() {
425            let pred = BlockIndex::new(pred);
426            for succ in &self.vcode.block_succs[range] {
427                let pos = &mut starts[succ.index()];
428                self.vcode.block_preds[*pos as usize] = pred;
429                *pos += 1;
430            }
431        }
432        debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
433    }
434
435    /// Called once, when a build in Backward order is complete, to
436    /// perform the overall reversal (into final forward order) and
437    /// finalize metadata accordingly.
438    fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
439        let n_insts = self.vcode.insts.len();
440        if n_insts == 0 {
441            return;
442        }
443
444        // Reverse the per-block and per-inst sequences.
445        self.vcode.block_ranges.reverse_index();
446        self.vcode.block_ranges.reverse_target(n_insts);
447        // block_params_range is indexed by block (and blocks were
448        // traversed in reverse) so we reverse it; but block-param
449        // sequences in the concatenated vec can remain in reverse
450        // order (it is effectively an arena of arbitrarily-placed
451        // referenced sequences).
452        self.vcode.block_params_range.reverse_index();
453        // Likewise, we reverse block_succ_range, but the block_succ
454        // concatenated array can remain as-is.
455        self.vcode.block_succ_range.reverse_index();
456        self.vcode.insts.reverse();
457        self.vcode.srclocs.reverse();
458        // Likewise, branch_block_arg_succ_range is indexed by block
459        // so must be reversed.
460        self.vcode.branch_block_arg_succ_range.reverse_index();
461
462        // To translate an instruction index *endpoint* in reversed
463        // order to forward order, compute `n_insts - i`.
464        //
465        // Why not `n_insts - 1 - i`? That would be correct to
466        // translate an individual instruction index (for ten insts 0
467        // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
468        // 0). But for the usual inclusive-start, exclusive-end range
469        // idiom, inclusive starts become exclusive ends and
470        // vice-versa, so e.g. an (inclusive) start of 0 becomes an
471        // (exclusive) end of 10.
472        let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
473
474        // Generate debug-value labels based on per-label maps.
475        for (label, tuples) in &self.debug_info {
476            for &(start, end, vreg) in tuples {
477                let vreg = vregs.resolve_vreg_alias(vreg);
478                let fwd_start = translate(end);
479                let fwd_end = translate(start);
480                self.vcode
481                    .debug_value_labels
482                    .push((vreg, fwd_start, fwd_end, label.as_u32()));
483            }
484        }
485
486        // Now sort debug value labels by VReg, as required
487        // by regalloc2.
488        self.vcode
489            .debug_value_labels
490            .sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
491    }
492
493    fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
494        let allocatable = PRegSet::from(self.vcode.machine_env());
495        for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
496            // Push operands from the instruction onto the operand list.
497            //
498            // We rename through the vreg alias table as we collect
499            // the operands. This is better than a separate post-pass
500            // over operands, because it has more cache locality:
501            // operands only need to pass through L1 once. This is
502            // also better than renaming instructions'
503            // operands/registers while lowering, because here we only
504            // need to do the `match` over the instruction to visit
505            // its register fields (which is slow, branchy code) once.
506
507            let mut op_collector =
508                OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
509                    vregs.resolve_vreg_alias(vreg)
510                });
511            insn.get_operands(&mut op_collector);
512            let (ops, clobbers) = op_collector.finish();
513            self.vcode.operand_ranges.push_end(ops);
514
515            if clobbers != PRegSet::default() {
516                self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
517            }
518
519            if let Some((dst, src)) = insn.is_move() {
520                // We should never see non-virtual registers present in move
521                // instructions.
522                assert!(
523                    src.is_virtual(),
524                    "the real register {:?} was used as the source of a move instruction",
525                    src
526                );
527                assert!(
528                    dst.to_reg().is_virtual(),
529                    "the real register {:?} was used as the destination of a move instruction",
530                    dst.to_reg()
531                );
532            }
533        }
534
535        // Translate blockparam args via the vreg aliases table as well.
536        for arg in &mut self.vcode.branch_block_args {
537            let new_arg = vregs.resolve_vreg_alias(*arg);
538            trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
539            *arg = new_arg;
540        }
541    }
542
543    /// Build the final VCode.
544    pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
545        self.vcode.vreg_types = take(&mut vregs.vreg_types);
546        self.vcode.facts = take(&mut vregs.facts);
547        self.vcode.reftyped_vregs = take(&mut vregs.reftyped_vregs);
548
549        if self.direction == VCodeBuildDirection::Backward {
550            self.reverse_and_finalize(&vregs);
551        }
552        self.collect_operands(&vregs);
553
554        // Apply register aliases to the `reftyped_vregs` list since this list
555        // will be returned directly to `regalloc2` eventually and all
556        // operands/results of instructions will use the alias-resolved vregs
557        // from `regalloc2`'s perspective.
558        //
559        // Also note that `reftyped_vregs` can't have duplicates, so after the
560        // aliases are applied duplicates are removed.
561        for reg in self.vcode.reftyped_vregs.iter_mut() {
562            *reg = vregs.resolve_vreg_alias(*reg);
563        }
564        self.vcode.reftyped_vregs.sort();
565        self.vcode.reftyped_vregs.dedup();
566
567        self.compute_preds_from_succs();
568        self.vcode.debug_value_labels.sort_unstable();
569
570        // At this point, nothing in the vcode should mention any
571        // VReg which has been aliased. All the appropriate rewriting
572        // should have happened above. Just to be sure, let's
573        // double-check each field which has vregs.
574        // Note: can't easily check vcode.insts, resolved in collect_operands.
575        // Operands are resolved in collect_operands.
576        vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
577        // Currently block params are never aliased to another vreg.
578        vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
579        // Branch block args are resolved in collect_operands.
580        vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
581        // Reftyped vregs are resolved above in this function.
582        vregs.debug_assert_no_vreg_aliases(self.vcode.reftyped_vregs.iter().copied());
583        // Debug value labels are resolved in reverse_and_finalize.
584        vregs.debug_assert_no_vreg_aliases(
585            self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
586        );
587        // Facts are resolved eagerly during set_vreg_alias.
588        vregs.debug_assert_no_vreg_aliases(
589            self.vcode
590                .facts
591                .iter()
592                .zip(&vregs.vreg_types)
593                .enumerate()
594                .filter(|(_, (fact, _))| fact.is_some())
595                .map(|(vreg, (_, &ty))| {
596                    let (regclasses, _) = I::rc_for_type(ty).unwrap();
597                    VReg::new(vreg, regclasses[0])
598                }),
599        );
600
601        self.vcode
602    }
603
604    /// Add a user stack map for the associated instruction.
605    pub fn add_user_stack_map(
606        &mut self,
607        inst: BackwardsInsnIndex,
608        entries: &[ir::UserStackMapEntry],
609    ) {
610        let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
611        let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
612        debug_assert!(old_entry.is_none());
613    }
614}
615
616/// Is this type a reference type?
617fn is_reftype(ty: Type) -> bool {
618    ty == types::R64 || ty == types::R32
619}
620
621const NO_INST_OFFSET: CodeOffset = u32::MAX;
622
623impl<I: VCodeInst> VCode<I> {
624    /// New empty VCode.
625    fn new(
626        sigs: SigSet,
627        abi: Callee<I::ABIMachineSpec>,
628        emit_info: I::Info,
629        block_order: BlockLoweringOrder,
630        constants: VCodeConstants,
631    ) -> Self {
632        let n_blocks = block_order.lowered_order().len();
633        VCode {
634            sigs,
635            vreg_types: vec![],
636            insts: Vec::with_capacity(10 * n_blocks),
637            user_stack_maps: FxHashMap::default(),
638            operands: Vec::with_capacity(30 * n_blocks),
639            operand_ranges: Ranges::with_capacity(10 * n_blocks),
640            clobbers: FxHashMap::default(),
641            srclocs: Vec::with_capacity(10 * n_blocks),
642            entry: BlockIndex::new(0),
643            block_ranges: Ranges::with_capacity(n_blocks),
644            block_succ_range: Ranges::with_capacity(n_blocks),
645            block_succs: Vec::with_capacity(n_blocks),
646            block_pred_range: Ranges::default(),
647            block_preds: Vec::new(),
648            block_params_range: Ranges::with_capacity(n_blocks),
649            block_params: Vec::with_capacity(5 * n_blocks),
650            branch_block_args: Vec::with_capacity(10 * n_blocks),
651            branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
652            branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
653            block_order,
654            abi,
655            emit_info,
656            reftyped_vregs: vec![],
657            constants,
658            debug_value_labels: vec![],
659            facts: vec![],
660        }
661    }
662
663    /// Get the ABI-dependent MachineEnv for managing register allocation.
664    pub fn machine_env(&self) -> &MachineEnv {
665        self.abi.machine_env(&self.sigs)
666    }
667
668    /// Get the number of blocks. Block indices will be in the range `0 ..
669    /// (self.num_blocks() - 1)`.
670    pub fn num_blocks(&self) -> usize {
671        self.block_ranges.len()
672    }
673
674    /// The number of lowered instructions.
675    pub fn num_insts(&self) -> usize {
676        self.insts.len()
677    }
678
679    fn compute_clobbers(&self, regalloc: &regalloc2::Output) -> Vec<Writable<RealReg>> {
680        let mut clobbered = PRegSet::default();
681
682        // All moves are included in clobbers.
683        for (_, Edit::Move { to, .. }) in &regalloc.edits {
684            if let Some(preg) = to.as_reg() {
685                clobbered.add(preg);
686            }
687        }
688
689        for (i, range) in self.operand_ranges.iter() {
690            // Skip this instruction if not "included in clobbers" as
691            // per the MachInst. (Some backends use this to implement
692            // ABI specifics; e.g., excluding calls of the same ABI as
693            // the current function from clobbers, because by
694            // definition everything clobbered by the call can be
695            // clobbered by this function without saving as well.)
696            if !self.insts[i].is_included_in_clobbers() {
697                continue;
698            }
699
700            let operands = &self.operands[range.clone()];
701            let allocs = &regalloc.allocs[range];
702            for (operand, alloc) in operands.iter().zip(allocs.iter()) {
703                if operand.kind() == OperandKind::Def {
704                    if let Some(preg) = alloc.as_reg() {
705                        clobbered.add(preg);
706                    }
707                }
708            }
709
710            // Also add explicitly-clobbered registers.
711            if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
712                clobbered.union_from(inst_clobbered);
713            }
714        }
715
716        clobbered
717            .into_iter()
718            .map(|preg| Writable::from_reg(RealReg::from(preg)))
719            .collect()
720    }
721
722    /// Emit the instructions to a `MachBuffer`, containing fixed-up
723    /// code and external reloc/trap/etc. records ready for use. Takes
724    /// the regalloc results as well.
725    ///
726    /// Returns the machine code itself, and optionally metadata
727    /// and/or a disassembly, as an `EmitResult`. The `VCode` itself
728    /// is consumed by the emission process.
729    pub fn emit(
730        mut self,
731        regalloc: &regalloc2::Output,
732        want_disasm: bool,
733        flags: &settings::Flags,
734        ctrl_plane: &mut ControlPlane,
735    ) -> EmitResult
736    where
737        I: VCodeInst,
738    {
739        // To write into disasm string.
740        use core::fmt::Write;
741
742        let _tt = timing::vcode_emit();
743        let mut buffer = MachBuffer::new();
744        let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
745
746        // The first M MachLabels are reserved for block indices.
747        buffer.reserve_labels_for_blocks(self.num_blocks());
748
749        // Register all allocated constants with the `MachBuffer` to ensure that
750        // any references to the constants during instructions can be handled
751        // correctly.
752        buffer.register_constants(&self.constants);
753
754        // Construct the final order we emit code in: cold blocks at the end.
755        let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
756        let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
757        for block in 0..self.num_blocks() {
758            let block = BlockIndex::new(block);
759            if self.block_order.is_cold(block) {
760                cold_blocks.push(block);
761            } else {
762                final_order.push(block);
763            }
764        }
765        final_order.extend(cold_blocks.clone());
766
767        // Compute/save info we need for the prologue: clobbers and
768        // number of spillslots.
769        //
770        // We clone `abi` here because we will mutate it as we
771        // generate the prologue and set other info, but we can't
772        // mutate `VCode`. The info it usually carries prior to
773        // setting clobbers is fairly minimal so this should be
774        // relatively cheap.
775        let clobbers = self.compute_clobbers(regalloc);
776        self.abi
777            .compute_frame_layout(&self.sigs, regalloc.num_spillslots, clobbers);
778
779        // Emit blocks.
780        let mut cur_srcloc = None;
781        let mut last_offset = None;
782        let mut inst_offsets = vec![];
783        let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
784
785        let mut disasm = String::new();
786
787        if !self.debug_value_labels.is_empty() {
788            inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
789        }
790
791        // Count edits per block ahead of time; this is needed for
792        // lookahead island emission. (We could derive it per-block
793        // with binary search in the edit list, but it's more
794        // efficient to do it in one pass here.)
795        let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
796        let mut edit_idx = 0;
797        for block in 0..self.num_blocks() {
798            let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
799            let start_edit_idx = edit_idx;
800            while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
801                edit_idx += 1;
802            }
803            let end_edit_idx = edit_idx;
804            ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
805        }
806
807        let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
808        let mut bb_padding = match flags.bb_padding_log2_minus_one() {
809            0 => Vec::new(),
810            n => vec![0; 1 << (n - 1)],
811        };
812        let mut total_bb_padding = 0;
813
814        for (block_order_idx, &block) in final_order.iter().enumerate() {
815            trace!("emitting block {:?}", block);
816
817            // Call the new block hook for state
818            state.on_new_block();
819
820            // Emit NOPs to align the block.
821            let new_offset = I::align_basic_block(buffer.cur_offset());
822            while new_offset > buffer.cur_offset() {
823                // Pad with NOPs up to the aligned block offset.
824                let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
825                nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
826            }
827            assert_eq!(buffer.cur_offset(), new_offset);
828
829            let do_emit = |inst: &I,
830                           disasm: &mut String,
831                           buffer: &mut MachBuffer<I>,
832                           state: &mut I::State| {
833                if want_disasm && !inst.is_args() {
834                    let mut s = state.clone();
835                    writeln!(disasm, "  {}", inst.pretty_print_inst(&mut s)).unwrap();
836                }
837                inst.emit(buffer, &self.emit_info, state);
838            };
839
840            // Is this the first block? Emit the prologue directly if so.
841            if block == self.entry {
842                trace!(" -> entry block");
843                buffer.start_srcloc(Default::default());
844                for inst in &self.abi.gen_prologue() {
845                    do_emit(&inst, &mut disasm, &mut buffer, &mut state);
846                }
847                buffer.end_srcloc();
848            }
849
850            // Now emit the regular block body.
851
852            buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
853
854            if want_disasm {
855                writeln!(&mut disasm, "block{}:", block.index()).unwrap();
856            }
857
858            if flags.machine_code_cfg_info() {
859                // Track BB starts. If we have backed up due to MachBuffer
860                // branch opts, note that the removed blocks were removed.
861                let cur_offset = buffer.cur_offset();
862                if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
863                    for i in (0..bb_starts.len()).rev() {
864                        if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
865                            break;
866                        }
867                        bb_starts[i] = None;
868                    }
869                }
870                bb_starts.push(Some(cur_offset));
871                last_offset = Some(cur_offset);
872            }
873
874            if let Some(block_start) = I::gen_block_start(
875                self.block_order.is_indirect_branch_target(block),
876                is_forward_edge_cfi_enabled,
877            ) {
878                do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
879            }
880
881            for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
882                match inst_or_edit {
883                    InstOrEdit::Inst(iix) => {
884                        if !self.debug_value_labels.is_empty() {
885                            // If we need to produce debug info,
886                            // record the offset of each instruction
887                            // so that we can translate value-label
888                            // ranges to machine-code offsets.
889
890                            // Cold blocks violate monotonicity
891                            // assumptions elsewhere (that
892                            // instructions in inst-index order are in
893                            // order in machine code), so we omit
894                            // their offsets here. Value-label range
895                            // generation below will skip empty ranges
896                            // and ranges with to-offsets of zero.
897                            if !self.block_order.is_cold(block) {
898                                inst_offsets[iix.index()] = buffer.cur_offset();
899                            }
900                        }
901
902                        // Update the srcloc at this point in the buffer.
903                        let srcloc = self.srclocs[iix.index()];
904                        if cur_srcloc != Some(srcloc) {
905                            if cur_srcloc.is_some() {
906                                buffer.end_srcloc();
907                            }
908                            buffer.start_srcloc(srcloc);
909                            cur_srcloc = Some(srcloc);
910                        }
911
912                        // If this is a safepoint, compute a stack map
913                        // and pass it to the emit state.
914                        let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
915                            let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
916                            // Find the contiguous range of
917                            // (progpoint, allocation) safepoint slot
918                            // records in `regalloc.safepoint_slots`
919                            // for this instruction index.
920                            let safepoint_slots_start = regalloc
921                                .safepoint_slots
922                                .binary_search_by(|(progpoint, _alloc)| {
923                                    if progpoint.inst() >= iix {
924                                        std::cmp::Ordering::Greater
925                                    } else {
926                                        std::cmp::Ordering::Less
927                                    }
928                                })
929                                .unwrap_err();
930
931                            for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..]
932                                .iter()
933                                .take_while(|(progpoint, _)| progpoint.inst() == iix)
934                            {
935                                let slot = alloc.as_stack().unwrap();
936                                safepoint_slots.push(slot);
937                            }
938
939                            let stack_map = if safepoint_slots.is_empty() {
940                                None
941                            } else {
942                                Some(
943                                    self.abi
944                                        .spillslots_to_stack_map(&safepoint_slots[..], &state),
945                                )
946                            };
947
948                            let (user_stack_map, user_stack_map_disasm) = {
949                                // The `user_stack_maps` is keyed by reverse
950                                // instruction index, so we must flip the
951                                // index. We can't put this into a helper method
952                                // due to borrowck issues because parts of
953                                // `self` are borrowed mutably elsewhere in this
954                                // function.
955                                let index = iix.to_backwards_insn_index(self.num_insts());
956                                let user_stack_map = self.user_stack_maps.remove(&index);
957                                let user_stack_map_disasm =
958                                    user_stack_map.as_ref().map(|m| format!("  ; {m:?}"));
959                                (user_stack_map, user_stack_map_disasm)
960                            };
961
962                            state.pre_safepoint(stack_map, user_stack_map);
963
964                            user_stack_map_disasm
965                        } else {
966                            None
967                        };
968
969                        // If the instruction we are about to emit is
970                        // a return, place an epilogue at this point
971                        // (and don't emit the return; the actual
972                        // epilogue will contain it).
973                        if self.insts[iix.index()].is_term() == MachTerminator::Ret {
974                            for inst in self.abi.gen_epilogue() {
975                                do_emit(&inst, &mut disasm, &mut buffer, &mut state);
976                            }
977                        } else {
978                            // Update the operands for this inst using the
979                            // allocations from the regalloc result.
980                            let mut allocs = regalloc.inst_allocs(iix).iter();
981                            self.insts[iix.index()].get_operands(
982                                &mut |reg: &mut Reg, constraint, _kind, _pos| {
983                                    let alloc = allocs
984                                        .next()
985                                        .expect("enough allocations for all operands")
986                                        .as_reg()
987                                        .expect("only register allocations, not stack allocations")
988                                        .into();
989
990                                    if let OperandConstraint::FixedReg(rreg) = constraint {
991                                        debug_assert_eq!(Reg::from(rreg), alloc);
992                                    }
993                                    *reg = alloc;
994                                },
995                            );
996                            debug_assert!(allocs.next().is_none());
997
998                            // Emit the instruction!
999                            do_emit(
1000                                &self.insts[iix.index()],
1001                                &mut disasm,
1002                                &mut buffer,
1003                                &mut state,
1004                            );
1005                            if let Some(stack_map_disasm) = stack_map_disasm {
1006                                disasm.push_str(&stack_map_disasm);
1007                                disasm.push('\n');
1008                            }
1009                        }
1010                    }
1011
1012                    InstOrEdit::Edit(Edit::Move { from, to }) => {
1013                        // Create a move/spill/reload instruction and
1014                        // immediately emit it.
1015                        match (from.as_reg(), to.as_reg()) {
1016                            (Some(from), Some(to)) => {
1017                                // Reg-to-reg move.
1018                                let from_rreg = Reg::from(from);
1019                                let to_rreg = Writable::from_reg(Reg::from(to));
1020                                debug_assert_eq!(from.class(), to.class());
1021                                let ty = I::canonical_type_for_rc(from.class());
1022                                let mv = I::gen_move(to_rreg, from_rreg, ty);
1023                                do_emit(&mv, &mut disasm, &mut buffer, &mut state);
1024                            }
1025                            (Some(from), None) => {
1026                                // Spill from register to spillslot.
1027                                let to = to.as_stack().unwrap();
1028                                let from_rreg = RealReg::from(from);
1029                                let spill = self.abi.gen_spill(to, from_rreg);
1030                                do_emit(&spill, &mut disasm, &mut buffer, &mut state);
1031                            }
1032                            (None, Some(to)) => {
1033                                // Load from spillslot to register.
1034                                let from = from.as_stack().unwrap();
1035                                let to_rreg = Writable::from_reg(RealReg::from(to));
1036                                let reload = self.abi.gen_reload(to_rreg, from);
1037                                do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1038                            }
1039                            (None, None) => {
1040                                panic!("regalloc2 should have eliminated stack-to-stack moves!");
1041                            }
1042                        }
1043                    }
1044                }
1045            }
1046
1047            if cur_srcloc.is_some() {
1048                buffer.end_srcloc();
1049                cur_srcloc = None;
1050            }
1051
1052            // Do we need an island? Get the worst-case size of the next BB, add
1053            // it to the optional padding behind the block, and pass this to the
1054            // `MachBuffer` to determine if an island is necessary.
1055            let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1056                let next_block = final_order[block_order_idx + 1];
1057                let next_block_range = self.block_ranges.get(next_block.index());
1058                let next_block_size = next_block_range.len() as u32;
1059                let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1060                I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1061            } else {
1062                0
1063            };
1064            let padding = if bb_padding.is_empty() {
1065                0
1066            } else {
1067                bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1068            };
1069            if buffer.island_needed(padding + worst_case_next_bb) {
1070                buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1071            }
1072
1073            // Insert padding, if configured, to stress the `MachBuffer`'s
1074            // relocation and island calculations.
1075            //
1076            // Padding can get quite large during fuzzing though so place a
1077            // total cap on it where when a per-function threshold is exceeded
1078            // the padding is turned back down to zero. This avoids a small-ish
1079            // test case generating a GB+ memory footprint in Cranelift for
1080            // example.
1081            if !bb_padding.is_empty() {
1082                buffer.put_data(&bb_padding);
1083                buffer.align_to(I::LabelUse::ALIGN);
1084                total_bb_padding += bb_padding.len();
1085                if total_bb_padding > (150 << 20) {
1086                    bb_padding = Vec::new();
1087                }
1088            }
1089        }
1090
1091        debug_assert!(
1092            self.user_stack_maps.is_empty(),
1093            "any stack maps should have been consumed by instruction emission, still have: {:#?}",
1094            self.user_stack_maps,
1095        );
1096
1097        // Do any optimizations on branches at tail of buffer, as if we had
1098        // bound one last label.
1099        buffer.optimize_branches(ctrl_plane);
1100
1101        // emission state is not needed anymore, move control plane back out
1102        *ctrl_plane = state.take_ctrl_plane();
1103
1104        let func_body_len = buffer.cur_offset();
1105
1106        // Create `bb_edges` and final (filtered) `bb_starts`.
1107        let mut bb_edges = vec![];
1108        let mut bb_offsets = vec![];
1109        if flags.machine_code_cfg_info() {
1110            for block in 0..self.num_blocks() {
1111                if bb_starts[block].is_none() {
1112                    // Block was deleted by MachBuffer; skip.
1113                    continue;
1114                }
1115                let from = bb_starts[block].unwrap();
1116
1117                bb_offsets.push(from);
1118                // Resolve each `succ` label and add edges.
1119                let succs = self.block_succs(BlockIndex::new(block));
1120                for &succ in succs.iter() {
1121                    let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1122                    bb_edges.push((from, to));
1123                }
1124            }
1125        }
1126
1127        self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1128        let value_labels_ranges =
1129            self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1130        let frame_size = self.abi.frame_size();
1131
1132        EmitResult {
1133            buffer: buffer.finish(&self.constants, ctrl_plane),
1134            bb_offsets,
1135            bb_edges,
1136            func_body_len,
1137            disasm: if want_disasm { Some(disasm) } else { None },
1138            sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1139            dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1140            value_labels_ranges,
1141            frame_size,
1142        }
1143    }
1144
1145    fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1146        if self.debug_value_labels.is_empty() {
1147            return;
1148        }
1149
1150        // During emission, branch removal can make offsets of instructions incorrect.
1151        // Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1152        // It will be recorded as (say):    [30]  [34]  [38]  [42]  [<would be 46>]
1153        // When the jumps get removed we are left with (in "inst_offsets"):
1154        // [insi][jmp0][jmp1][jmp2][insj][...]
1155        // [30]  [34]  [38]  [42]  [34]
1156        // Which violates the monotonicity invariant. This method sets offsets of these
1157        // removed instructions such as to make them appear zero-sized:
1158        // [insi][jmp0][jmp1][jmp2][insj][...]
1159        // [30]  [34]  [34]  [34]  [34]
1160        //
1161        let mut next_offset = func_body_len;
1162        for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1163            let inst_offset = inst_offsets[inst_index];
1164
1165            // Not all instructions get their offsets recorded.
1166            if inst_offset == NO_INST_OFFSET {
1167                continue;
1168            }
1169
1170            if inst_offset > next_offset {
1171                trace!(
1172                    "Fixing code offset of the removed Inst {}: {} -> {}",
1173                    inst_index,
1174                    inst_offset,
1175                    next_offset
1176                );
1177                inst_offsets[inst_index] = next_offset;
1178                continue;
1179            }
1180
1181            next_offset = inst_offset;
1182        }
1183    }
1184
1185    fn compute_value_labels_ranges(
1186        &self,
1187        regalloc: &regalloc2::Output,
1188        inst_offsets: &[CodeOffset],
1189        func_body_len: u32,
1190    ) -> ValueLabelsRanges {
1191        if self.debug_value_labels.is_empty() {
1192            return ValueLabelsRanges::default();
1193        }
1194
1195        let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1196        for &(label, from, to, alloc) in &regalloc.debug_locations {
1197            let ranges = value_labels_ranges
1198                .entry(ValueLabel::from_u32(label))
1199                .or_insert_with(|| vec![]);
1200            let from_offset = inst_offsets[from.inst().index()];
1201            let to_offset = if to.inst().index() == inst_offsets.len() {
1202                func_body_len
1203            } else {
1204                inst_offsets[to.inst().index()]
1205            };
1206
1207            // Empty ranges or unavailable offsets can happen
1208            // due to cold blocks and branch removal (see above).
1209            if from_offset == NO_INST_OFFSET
1210                || to_offset == NO_INST_OFFSET
1211                || from_offset == to_offset
1212            {
1213                continue;
1214            }
1215
1216            let loc = if let Some(preg) = alloc.as_reg() {
1217                LabelValueLoc::Reg(Reg::from(preg))
1218            } else {
1219                let slot = alloc.as_stack().unwrap();
1220                let slot_offset = self.abi.get_spillslot_offset(slot);
1221                let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1222                let caller_sp_to_cfa_offset =
1223                    crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1224                // NOTE: this is a negative offset because it's relative to the caller's SP
1225                let cfa_to_sp_offset =
1226                    -((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1227                LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1228            };
1229
1230            // ValueLocRanges are recorded by *instruction-end
1231            // offset*. `from_offset` is the *start* of the
1232            // instruction; that is the same as the end of another
1233            // instruction, so we only want to begin coverage once
1234            // we are past the previous instruction's end.
1235            let start = from_offset + 1;
1236
1237            // Likewise, `end` is exclusive, but we want to
1238            // *include* the end of the last
1239            // instruction. `to_offset` is the start of the
1240            // `to`-instruction, which is the exclusive end, i.e.,
1241            // the first instruction not covered. That
1242            // instruction's start is the same as the end of the
1243            // last instruction that is included, so we go one
1244            // byte further to be sure to include it.
1245            let end = to_offset + 1;
1246
1247            // Coalesce adjacent ranges that for the same location
1248            // to minimize output size here and for the consumers.
1249            if let Some(last_loc_range) = ranges.last_mut() {
1250                if last_loc_range.loc == loc && last_loc_range.end == start {
1251                    trace!(
1252                        "Extending debug range for VL{} in {:?} to {}",
1253                        label,
1254                        loc,
1255                        end
1256                    );
1257                    last_loc_range.end = end;
1258                    continue;
1259                }
1260            }
1261
1262            trace!(
1263                "Recording debug range for VL{} in {:?}: [Inst {}..Inst {}) [{}..{})",
1264                label,
1265                loc,
1266                from.inst().index(),
1267                to.inst().index(),
1268                start,
1269                end
1270            );
1271
1272            ranges.push(ValueLocRange { loc, start, end });
1273        }
1274
1275        value_labels_ranges
1276    }
1277
1278    /// Get the IR block for a BlockIndex, if one exists.
1279    pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1280        self.block_order.lowered_order()[block.index()].orig_block()
1281    }
1282
1283    /// Get the type of a VReg.
1284    pub fn vreg_type(&self, vreg: VReg) -> Type {
1285        self.vreg_types[vreg.vreg()]
1286    }
1287
1288    /// Get the fact, if any, for a given VReg.
1289    pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1290        self.facts[vreg.vreg()].as_ref()
1291    }
1292
1293    /// Set the fact for a given VReg.
1294    pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1295        trace!("set fact on {}: {:?}", vreg, fact);
1296        self.facts[vreg.vreg()] = Some(fact);
1297    }
1298
1299    /// Does a given instruction define any facts?
1300    pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1301        self.inst_operands(inst)
1302            .iter()
1303            .filter(|o| o.kind() == OperandKind::Def)
1304            .map(|o| o.vreg())
1305            .any(|vreg| self.facts[vreg.vreg()].is_some())
1306    }
1307
1308    /// Get the user stack map associated with the given forward instruction index.
1309    pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1310        let index = inst.to_backwards_insn_index(self.num_insts());
1311        self.user_stack_maps.get(&index)
1312    }
1313}
1314
1315impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1316    type Output = I;
1317    fn index(&self, idx: InsnIndex) -> &Self::Output {
1318        &self.insts[idx.index()]
1319    }
1320}
1321
1322impl<I: VCodeInst> RegallocFunction for VCode<I> {
1323    fn num_insts(&self) -> usize {
1324        self.insts.len()
1325    }
1326
1327    fn num_blocks(&self) -> usize {
1328        self.block_ranges.len()
1329    }
1330
1331    fn entry_block(&self) -> BlockIndex {
1332        self.entry
1333    }
1334
1335    fn block_insns(&self, block: BlockIndex) -> InstRange {
1336        let range = self.block_ranges.get(block.index());
1337        InstRange::forward(InsnIndex::new(range.start), InsnIndex::new(range.end))
1338    }
1339
1340    fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1341        let range = self.block_succ_range.get(block.index());
1342        &self.block_succs[range]
1343    }
1344
1345    fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1346        let range = self.block_pred_range.get(block.index());
1347        &self.block_preds[range]
1348    }
1349
1350    fn block_params(&self, block: BlockIndex) -> &[VReg] {
1351        // As a special case we don't return block params for the entry block, as all the arguments
1352        // will be defined by the `Inst::Args` instruction.
1353        if block == self.entry {
1354            return &[];
1355        }
1356
1357        let range = self.block_params_range.get(block.index());
1358        &self.block_params[range]
1359    }
1360
1361    fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1362        let succ_range = self.branch_block_arg_succ_range.get(block.index());
1363        debug_assert!(succ_idx < succ_range.len());
1364        let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1365        &self.branch_block_args[branch_block_args]
1366    }
1367
1368    fn is_ret(&self, insn: InsnIndex) -> bool {
1369        match self.insts[insn.index()].is_term() {
1370            // We treat blocks terminated by an unconditional trap like a return for regalloc.
1371            MachTerminator::None => self.insts[insn.index()].is_trap(),
1372            MachTerminator::Ret | MachTerminator::RetCall => true,
1373            MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false,
1374        }
1375    }
1376
1377    fn is_branch(&self, insn: InsnIndex) -> bool {
1378        match self.insts[insn.index()].is_term() {
1379            MachTerminator::Cond | MachTerminator::Uncond | MachTerminator::Indirect => true,
1380            _ => false,
1381        }
1382    }
1383
1384    fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool {
1385        self.insts[insn.index()].is_safepoint()
1386    }
1387
1388    fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1389        let range = self.operand_ranges.get(insn.index());
1390        &self.operands[range]
1391    }
1392
1393    fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1394        self.clobbers.get(&insn).cloned().unwrap_or_default()
1395    }
1396
1397    fn num_vregs(&self) -> usize {
1398        self.vreg_types.len()
1399    }
1400
1401    fn reftype_vregs(&self) -> &[VReg] {
1402        &self.reftyped_vregs
1403    }
1404
1405    fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1406        &self.debug_value_labels
1407    }
1408
1409    fn spillslot_size(&self, regclass: RegClass) -> usize {
1410        self.abi.get_spillslot_size(regclass) as usize
1411    }
1412
1413    fn allow_multiple_vreg_defs(&self) -> bool {
1414        // At least the s390x backend requires this, because the
1415        // `Loop` pseudo-instruction aggregates all Operands so pinned
1416        // vregs (RealRegs) may occur more than once.
1417        true
1418    }
1419}
1420
1421impl<I: VCodeInst> Debug for VRegAllocator<I> {
1422    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1423        writeln!(f, "VRegAllocator {{")?;
1424
1425        let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1426        alias_keys.sort_unstable();
1427        for key in alias_keys {
1428            let dest = self.vreg_aliases.get(&key).unwrap();
1429            writeln!(f, "  {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1430        }
1431
1432        for (vreg, fact) in self.facts.iter().enumerate() {
1433            if let Some(fact) = fact {
1434                writeln!(f, "  v{} ! {}", vreg, fact)?;
1435            }
1436        }
1437
1438        writeln!(f, "}}")
1439    }
1440}
1441
1442impl<I: VCodeInst> fmt::Debug for VCode<I> {
1443    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1444        writeln!(f, "VCode {{")?;
1445        writeln!(f, "  Entry block: {}", self.entry.index())?;
1446
1447        let mut state = Default::default();
1448
1449        for block in 0..self.num_blocks() {
1450            let block = BlockIndex::new(block);
1451            writeln!(
1452                f,
1453                "Block {}({:?}):",
1454                block.index(),
1455                self.block_params(block)
1456            )?;
1457            if let Some(bb) = self.bindex_to_bb(block) {
1458                writeln!(f, "    (original IR block: {})", bb)?;
1459            }
1460            for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1461                writeln!(
1462                    f,
1463                    "    (successor: Block {}({:?}))",
1464                    succ.index(),
1465                    self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1466                )?;
1467            }
1468            for inst in self.block_ranges.get(block.index()) {
1469                writeln!(
1470                    f,
1471                    "  Inst {}: {}",
1472                    inst,
1473                    self.insts[inst].pretty_print_inst(&mut state)
1474                )?;
1475                if !self.operands.is_empty() {
1476                    for operand in self.inst_operands(InsnIndex::new(inst)) {
1477                        if operand.kind() == OperandKind::Def {
1478                            if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1479                                writeln!(f, "    v{} ! {}", operand.vreg().vreg(), fact)?;
1480                            }
1481                        }
1482                    }
1483                }
1484                if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1485                    writeln!(f, "    {user_stack_map:?}")?;
1486                }
1487            }
1488        }
1489
1490        writeln!(f, "}}")?;
1491        Ok(())
1492    }
1493}
1494
1495/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1496pub struct VRegAllocator<I> {
1497    /// VReg IR-level types.
1498    vreg_types: Vec<Type>,
1499
1500    /// Reference-typed `regalloc2::VReg`s. The regalloc requires
1501    /// these in a dense slice (as opposed to querying the
1502    /// reftype-status of each vreg) for efficient iteration.
1503    reftyped_vregs: Vec<VReg>,
1504
1505    /// VReg aliases. When the final VCode is built we rewrite all
1506    /// uses of the keys in this table to their replacement values.
1507    ///
1508    /// We use these aliases to rename an instruction's expected
1509    /// result vregs to the returned vregs from lowering, which are
1510    /// usually freshly-allocated temps.
1511    vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1512
1513    /// A deferred error, to be bubbled up to the top level of the
1514    /// lowering algorithm. We take this approach because we cannot
1515    /// currently propagate a `Result` upward through ISLE code (the
1516    /// lowering rules) or some ABI code.
1517    deferred_error: Option<CodegenError>,
1518
1519    /// Facts on VRegs, for proof-carrying code.
1520    facts: Vec<Option<Fact>>,
1521
1522    /// The type of instruction that this allocator makes registers for.
1523    _inst: core::marker::PhantomData<I>,
1524}
1525
1526impl<I: VCodeInst> VRegAllocator<I> {
1527    /// Make a new VRegAllocator.
1528    pub fn with_capacity(capacity: usize) -> Self {
1529        let capacity = first_user_vreg_index() + capacity;
1530        let mut vreg_types = Vec::with_capacity(capacity);
1531        vreg_types.resize(first_user_vreg_index(), types::INVALID);
1532        Self {
1533            vreg_types,
1534            reftyped_vregs: vec![],
1535            vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1536            deferred_error: None,
1537            facts: Vec::with_capacity(capacity),
1538            _inst: core::marker::PhantomData::default(),
1539        }
1540    }
1541
1542    /// Allocate a fresh ValueRegs.
1543    pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1544        if self.deferred_error.is_some() {
1545            return Err(CodegenError::CodeTooLarge);
1546        }
1547        let v = self.vreg_types.len();
1548        let (regclasses, tys) = I::rc_for_type(ty)?;
1549        if v + regclasses.len() >= VReg::MAX {
1550            return Err(CodegenError::CodeTooLarge);
1551        }
1552
1553        let regs: ValueRegs<Reg> = match regclasses {
1554            &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1555            &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1556            // We can extend this if/when we support 32-bit targets; e.g.,
1557            // an i128 on a 32-bit machine will need up to four machine regs
1558            // for a `Value`.
1559            _ => panic!("Value must reside in 1 or 2 registers"),
1560        };
1561        for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1562            let vreg = reg.to_virtual_reg().unwrap();
1563            debug_assert_eq!(self.vreg_types.len(), vreg.index());
1564            self.vreg_types.push(reg_ty);
1565            if is_reftype(reg_ty) {
1566                self.reftyped_vregs.push(vreg.into());
1567            }
1568        }
1569
1570        // Create empty facts for each allocated vreg.
1571        self.facts.resize(self.vreg_types.len(), None);
1572
1573        Ok(regs)
1574    }
1575
1576    /// Allocate a fresh ValueRegs, deferring any out-of-vregs
1577    /// errors. This is useful in places where we cannot bubble a
1578    /// `CodegenResult` upward easily, and which are known to be
1579    /// invoked from within the lowering loop that checks the deferred
1580    /// error status below.
1581    pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1582        match self.alloc(ty) {
1583            Ok(x) => x,
1584            Err(e) => {
1585                self.deferred_error = Some(e);
1586                self.bogus_for_deferred_error(ty)
1587            }
1588        }
1589    }
1590
1591    /// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1592    pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1593        self.deferred_error.take()
1594    }
1595
1596    /// Produce an bogus VReg placeholder with the proper number of
1597    /// registers for the given type. This is meant to be used with
1598    /// deferred allocation errors (see `Lower::alloc_tmp()`).
1599    fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1600        let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1601        match regclasses {
1602            &[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1603            &[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1604            _ => panic!("Value must reside in 1 or 2 registers"),
1605        }
1606    }
1607
1608    /// Rewrite any mention of `from` into `to`.
1609    pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1610        let from = from.into();
1611        let resolved_to = self.resolve_vreg_alias(to.into());
1612        // Disallow cycles (see below).
1613        assert_ne!(resolved_to, from);
1614
1615        // Maintain the invariant that PCC facts only exist on vregs
1616        // which aren't aliases. We want to preserve whatever was
1617        // stated about the vreg before its producer was lowered.
1618        if let Some(fact) = self.facts[from.vreg()].take() {
1619            self.set_fact(resolved_to, fact);
1620        }
1621
1622        let old_alias = self.vreg_aliases.insert(from, resolved_to);
1623        debug_assert_eq!(old_alias, None);
1624    }
1625
1626    fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1627        // We prevent cycles from existing by resolving targets of
1628        // aliases eagerly before setting them. If the target resolves
1629        // to the origin of the alias, then a cycle would be created
1630        // and the alias is disallowed. Because of the structure of
1631        // SSA code (one instruction can refer to another's defs but
1632        // not vice-versa, except indirectly through
1633        // phis/blockparams), cycles should not occur as we use
1634        // aliases to redirect vregs to the temps that actually define
1635        // them.
1636        while let Some(to) = self.vreg_aliases.get(&vreg) {
1637            vreg = *to;
1638        }
1639        vreg
1640    }
1641
1642    #[inline]
1643    fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1644        debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1645    }
1646
1647    /// Set the proof-carrying code fact on a given virtual register.
1648    ///
1649    /// Returns the old fact, if any (only one fact can be stored).
1650    fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1651        trace!("vreg {:?} has fact: {:?}", vreg, fact);
1652        debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1653        self.facts[vreg.vreg()].replace(fact)
1654    }
1655
1656    /// Set a fact only if one doesn't already exist.
1657    pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1658        let vreg = self.resolve_vreg_alias(vreg.into());
1659        if self.facts[vreg.vreg()].is_none() {
1660            self.set_fact(vreg, fact);
1661        }
1662    }
1663
1664    /// Allocate a fresh ValueRegs, with a given fact to apply if
1665    /// the value fits in one VReg.
1666    pub fn alloc_with_maybe_fact(
1667        &mut self,
1668        ty: Type,
1669        fact: Option<Fact>,
1670    ) -> CodegenResult<ValueRegs<Reg>> {
1671        let result = self.alloc(ty)?;
1672
1673        // Ensure that we don't lose a fact on a value that splits
1674        // into multiple VRegs.
1675        assert!(result.len() == 1 || fact.is_none());
1676        if let Some(fact) = fact {
1677            self.set_fact(result.regs()[0].into(), fact);
1678        }
1679
1680        Ok(result)
1681    }
1682}
1683
1684/// This structure tracks the large constants used in VCode that will be emitted separately by the
1685/// [MachBuffer].
1686///
1687/// First, during the lowering phase, constants are inserted using
1688/// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1689/// used in this phase. Some deduplication is performed, when possible, as constant
1690/// values are inserted.
1691///
1692/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1693/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1694/// then writes the constant values to the buffer.
1695#[derive(Default)]
1696pub struct VCodeConstants {
1697    constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1698    pool_uses: HashMap<Constant, VCodeConstant>,
1699    well_known_uses: HashMap<*const [u8], VCodeConstant>,
1700    u64s: HashMap<[u8; 8], VCodeConstant>,
1701}
1702impl VCodeConstants {
1703    /// Initialize the structure with the expected number of constants.
1704    pub fn with_capacity(expected_num_constants: usize) -> Self {
1705        Self {
1706            constants: PrimaryMap::with_capacity(expected_num_constants),
1707            pool_uses: HashMap::with_capacity(expected_num_constants),
1708            well_known_uses: HashMap::new(),
1709            u64s: HashMap::new(),
1710        }
1711    }
1712
1713    /// Insert a constant; using this method indicates that a constant value will be used and thus
1714    /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1715    /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1716    /// [VCodeConstantData::Generated].
1717    pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1718        match data {
1719            VCodeConstantData::Generated(_) => self.constants.push(data),
1720            VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1721                None => {
1722                    let vcode_constant = self.constants.push(data);
1723                    self.pool_uses.insert(constant, vcode_constant);
1724                    vcode_constant
1725                }
1726                Some(&vcode_constant) => vcode_constant,
1727            },
1728            VCodeConstantData::WellKnown(data_ref) => {
1729                match self.well_known_uses.entry(data_ref as *const [u8]) {
1730                    Entry::Vacant(v) => {
1731                        let vcode_constant = self.constants.push(data);
1732                        v.insert(vcode_constant);
1733                        vcode_constant
1734                    }
1735                    Entry::Occupied(o) => *o.get(),
1736                }
1737            }
1738            VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1739                Entry::Vacant(v) => {
1740                    let vcode_constant = self.constants.push(data);
1741                    v.insert(vcode_constant);
1742                    vcode_constant
1743                }
1744                Entry::Occupied(o) => *o.get(),
1745            },
1746        }
1747    }
1748
1749    /// Return the number of constants inserted.
1750    pub fn len(&self) -> usize {
1751        self.constants.len()
1752    }
1753
1754    /// Iterate over the `VCodeConstant` keys inserted in this structure.
1755    pub fn keys(&self) -> Keys<VCodeConstant> {
1756        self.constants.keys()
1757    }
1758
1759    /// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1760    /// structure.
1761    pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1762        self.constants.iter()
1763    }
1764
1765    /// Returns the data associated with the specified constant.
1766    pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1767        &self.constants[c]
1768    }
1769
1770    /// Checks if the given [VCodeConstantData] is registered as
1771    /// used by the pool.
1772    pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1773        match constant {
1774            VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1775            _ => false,
1776        }
1777    }
1778}
1779
1780/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1781#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1782pub struct VCodeConstant(u32);
1783entity_impl!(VCodeConstant);
1784
1785/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1786/// these separately instead of as raw byte buffers allows us to avoid some duplication.
1787pub enum VCodeConstantData {
1788    /// A constant already present in the Cranelift IR
1789    /// [ConstantPool](crate::ir::constant::ConstantPool).
1790    Pool(Constant, ConstantData),
1791    /// A reference to a well-known constant value that is statically encoded within the compiler.
1792    WellKnown(&'static [u8]),
1793    /// A constant value generated during lowering; the value may depend on the instruction context
1794    /// which makes it difficult to de-duplicate--if possible, use other variants.
1795    Generated(ConstantData),
1796    /// A constant of at most 64 bits. These are deduplicated as
1797    /// well. Stored as a fixed-size array of `u8` so that we do not
1798    /// encounter endianness problems when cross-compiling.
1799    U64([u8; 8]),
1800}
1801impl VCodeConstantData {
1802    /// Retrieve the constant data as a byte slice.
1803    pub fn as_slice(&self) -> &[u8] {
1804        match self {
1805            VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1806            VCodeConstantData::WellKnown(d) => d,
1807            VCodeConstantData::U64(value) => &value[..],
1808        }
1809    }
1810
1811    /// Calculate the alignment of the constant data.
1812    pub fn alignment(&self) -> u32 {
1813        if self.as_slice().len() <= 8 {
1814            8
1815        } else {
1816            16
1817        }
1818    }
1819}
1820
1821#[cfg(test)]
1822mod test {
1823    use super::*;
1824    use std::mem::size_of;
1825
1826    #[test]
1827    fn size_of_constant_structs() {
1828        assert_eq!(size_of::<Constant>(), 4);
1829        assert_eq!(size_of::<VCodeConstant>(), 4);
1830        assert_eq!(size_of::<ConstantData>(), 24);
1831        assert_eq!(size_of::<VCodeConstantData>(), 32);
1832        assert_eq!(
1833            size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1834            24
1835        );
1836        // TODO The VCodeConstants structure's memory size could be further optimized.
1837        // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1838        // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1839    }
1840}
cranelift_codegen/machinst/vcode.rs

cranelift_codegen/machinst/
vcode.rs