cranelift_codegen/
inline.rs

1//! Function inlining infrastructure.
2//!
3//! This module provides "inlining as a library" to Cranelift users; it does
4//! _not_ provide a complete, off-the-shelf inlining solution. Cranelift's
5//! compilation context is per-function and does not encompass the full call
6//! graph. It does not know which functions are hot and which are cold, which
7//! have been marked the equivalent of `#[inline(never)]`, etc... Only the
8//! Cranelift user can understand these aspects of the full compilation
9//! pipeline, and these things can be very different between (say) Wasmtime and
10//! `cg_clif`. Therefore, this module does not attempt to define hueristics for
11//! when inlining a particular call is likely beneficial. This module only
12//! provides hooks for the Cranelift user to define whether a given call should
13//! be inlined or not, and the mechanics to inline a callee into a particular
14//! call site when directed to do so by the Cranelift user.
15//!
16//! The top-level inlining entry point during Cranelift compilation is
17//! [`Context::inline`][crate::Context::inline]. It takes an [`Inline`] trait
18//! implementation, which is authored by the Cranelift user and directs
19//! Cranelift whether to inline a particular call, and, when inlining, gives
20//! Cranelift the body of the callee that is to be inlined.
21
22use crate::cursor::{Cursor as _, FuncCursor};
23use crate::ir::{self, ExceptionTableData, ExceptionTableItem, InstBuilder as _};
24use crate::result::CodegenResult;
25use crate::trace;
26use crate::traversals::Dfs;
27use alloc::borrow::Cow;
28use alloc::vec::Vec;
29use cranelift_entity::{SecondaryMap, packed_option::PackedOption};
30use smallvec::SmallVec;
31
32type SmallValueVec = SmallVec<[ir::Value; 8]>;
33type SmallBlockArgVec = SmallVec<[ir::BlockArg; 8]>;
34type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
35
36/// A command directing Cranelift whether or not to inline a particular call.
37pub enum InlineCommand<'a> {
38    /// Keep the call as-is, out-of-line, and do not inline the callee.
39    KeepCall,
40
41    /// Inline the call, using this function as the body of the callee.
42    ///
43    /// It is the `Inline` implementor's responsibility to ensure that this
44    /// function is the correct callee. Providing the wrong function may result
45    /// in panics during compilation or incorrect runtime behavior.
46    Inline {
47        /// The callee function's body.
48        callee: Cow<'a, ir::Function>,
49        /// Whether to visit any function calls within the callee body after
50        /// inlining and consider them for further inlining.
51        visit_callee: bool,
52    },
53}
54
55/// A trait for directing Cranelift whether to inline a particular call or not.
56///
57/// Used in combination with the [`Context::inline`][crate::Context::inline]
58/// method.
59pub trait Inline {
60    /// A hook invoked for each direct call instruction in a function, whose
61    /// result determines whether Cranelift should inline a given call.
62    ///
63    /// The Cranelift user is responsible for defining their own hueristics and
64    /// deciding whether inlining the call is beneficial.
65    ///
66    /// When returning a function and directing Cranelift to inline its body
67    /// into the call site, the `Inline` implementer must ensure the following:
68    ///
69    /// * The returned function's signature exactly matches the `callee`
70    ///   `FuncRef`'s signature.
71    ///
72    /// * The returned function must be legalized.
73    ///
74    /// * The returned function must be valid (i.e. it must pass the CLIF
75    ///   verifier).
76    ///
77    /// * The returned function is a correct and valid implementation of the
78    ///   `callee` according to your language's semantics.
79    ///
80    /// Failure to uphold these invariants may result in panics during
81    /// compilation or incorrect runtime behavior in the generated code.
82    fn inline(
83        &mut self,
84        caller: &ir::Function,
85        call_inst: ir::Inst,
86        call_opcode: ir::Opcode,
87        callee: ir::FuncRef,
88        call_args: &[ir::Value],
89    ) -> InlineCommand<'_>;
90}
91
92impl<'a, T> Inline for &'a mut T
93where
94    T: Inline,
95{
96    fn inline(
97        &mut self,
98        caller: &ir::Function,
99        inst: ir::Inst,
100        opcode: ir::Opcode,
101        callee: ir::FuncRef,
102        args: &[ir::Value],
103    ) -> InlineCommand<'_> {
104        (*self).inline(caller, inst, opcode, callee, args)
105    }
106}
107
108/// Walk the given function, invoke the `Inline` implementation for each call
109/// instruction, and inline the callee when directed to do so.
110///
111/// Returns whether any call was inlined.
112pub(crate) fn do_inlining(
113    func: &mut ir::Function,
114    mut inliner: impl Inline,
115) -> CodegenResult<bool> {
116    trace!("function {} before inlining: {}", func.name, func);
117
118    let mut inlined_any = false;
119    let mut allocs = InliningAllocs::default();
120
121    let mut cursor = FuncCursor::new(func);
122    'block_loop: while let Some(block) = cursor.next_block() {
123        // Always keep track of our previous cursor position. Assuming that the
124        // current position is a function call that we will inline, then the
125        // previous position is just before the inlined callee function. After
126        // inlining a call, the Cranelift user can decide whether to consider
127        // any function calls in the inlined callee for further inlining or
128        // not. When they do, then we back up to this previous cursor position
129        // so that our traversal will then continue over the inlined body.
130        let mut prev_pos;
131
132        while let Some(inst) = {
133            prev_pos = cursor.position();
134            cursor.next_inst()
135        } {
136            // Make sure that `block` is always `inst`'s block, even with all of
137            // our cursor-position-updating and block-splitting-during-inlining
138            // shenanigans below.
139            debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140
141            match cursor.func.dfg.insts[inst] {
142                ir::InstructionData::Call {
143                    opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
144                    args: _,
145                    func_ref,
146                } => {
147                    trace!(
148                        "considering call site for inlining: {inst}: {}",
149                        cursor.func.dfg.display_inst(inst),
150                    );
151                    let args = cursor.func.dfg.inst_args(inst);
152                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
153                        InlineCommand::KeepCall => {
154                            trace!("  --> keeping call");
155                        }
156                        InlineCommand::Inline {
157                            callee,
158                            visit_callee,
159                        } => {
160                            let last_inlined_block = inline_one(
161                                &mut allocs,
162                                cursor.func,
163                                func_ref,
164                                block,
165                                inst,
166                                opcode,
167                                &callee,
168                                None,
169                            );
170                            inlined_any = true;
171                            if visit_callee {
172                                cursor.set_position(prev_pos);
173                            } else {
174                                // Arrange it so that the `next_block()` loop
175                                // will continue to the next block that is not
176                                // associated with the just-inlined callee.
177                                cursor.goto_bottom(last_inlined_block);
178                                continue 'block_loop;
179                            }
180                        }
181                    }
182                }
183                ir::InstructionData::TryCall {
184                    opcode: opcode @ ir::Opcode::TryCall,
185                    args: _,
186                    func_ref,
187                    exception,
188                } => {
189                    trace!(
190                        "considering call site for inlining: {inst}: {}",
191                        cursor.func.dfg.display_inst(inst),
192                    );
193                    let args = cursor.func.dfg.inst_args(inst);
194                    match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
195                        InlineCommand::KeepCall => {
196                            trace!("  --> keeping call");
197                        }
198                        InlineCommand::Inline {
199                            callee,
200                            visit_callee,
201                        } => {
202                            let last_inlined_block = inline_one(
203                                &mut allocs,
204                                cursor.func,
205                                func_ref,
206                                block,
207                                inst,
208                                opcode,
209                                &callee,
210                                Some(exception),
211                            );
212                            inlined_any = true;
213                            if visit_callee {
214                                cursor.set_position(prev_pos);
215                            } else {
216                                // Arrange it so that the `next_block()` loop
217                                // will continue to the next block that is not
218                                // associated with the just-inlined callee.
219                                cursor.goto_bottom(last_inlined_block);
220                                continue 'block_loop;
221                            }
222                        }
223                    }
224                }
225                ir::InstructionData::CallIndirect { .. }
226                | ir::InstructionData::TryCallIndirect { .. } => {
227                    // Can't inline indirect calls; need to have some earlier
228                    // pass rewrite them into direct calls first, when possible.
229                }
230                _ => {
231                    debug_assert!(
232                        !cursor.func.dfg.insts[inst].opcode().is_call(),
233                        "should have matched all call instructions, but found: {inst}: {}",
234                        cursor.func.dfg.display_inst(inst),
235                    );
236                }
237            }
238        }
239    }
240
241    if inlined_any {
242        trace!("function {} after inlining: {}", func.name, func);
243    } else {
244        trace!("function {} did not have any callees inlined", func.name);
245    }
246
247    Ok(inlined_any)
248}
249
250#[derive(Default)]
251struct InliningAllocs {
252    /// Map from callee value to inlined caller value.
253    values: SecondaryMap<ir::Value, PackedOption<ir::Value>>,
254
255    /// Map from callee constant to inlined caller constant.
256    ///
257    /// Not in `EntityMap` because these are hash-consed inside the
258    /// `ir::Function`.
259    constants: SecondaryMap<ir::Constant, PackedOption<ir::Constant>>,
260
261    /// Map from callee to inlined caller external name refs.
262    ///
263    /// Not in `EntityMap` because these are hash-consed inside the
264    /// `ir::Function`.
265    user_external_name_refs:
266        SecondaryMap<ir::UserExternalNameRef, PackedOption<ir::UserExternalNameRef>>,
267
268    /// The set of _caller_ inlined call instructions that need exception table
269    /// fixups at the end of inlining.
270    ///
271    /// This includes all kinds of non-returning calls, not just the literal
272    /// `call` instruction: `call_indirect`, `try_call`, `try_call_indirect`,
273    /// etc... However, it does not include `return_call` and
274    /// `return_call_indirect` instructions because the caller cannot catch
275    /// exceptions that those calls throw because the caller is no longer on the
276    /// stack as soon as they are executed.
277    ///
278    /// Note: this is a simple `Vec`, and not an `EntitySet`, because it is very
279    /// sparse: most of the caller's instructions are not inlined call
280    /// instructions. Additionally, we require deterministic iteration order and
281    /// do not require set-membership testing, so a hash set is not a good
282    /// choice either.
283    calls_needing_exception_table_fixup: Vec<ir::Inst>,
284}
285
286impl InliningAllocs {
287    fn reset(&mut self, callee: &ir::Function) {
288        let InliningAllocs {
289            values,
290            constants,
291            user_external_name_refs,
292            calls_needing_exception_table_fixup,
293        } = self;
294
295        values.clear();
296        values.resize(callee.dfg.len_values());
297
298        constants.clear();
299        constants.resize(callee.dfg.constants.len());
300
301        user_external_name_refs.clear();
302        user_external_name_refs.resize(callee.params.user_named_funcs().len());
303
304        // Note: We do not reserve capacity for
305        // `calls_needing_exception_table_fixup` because it is a sparse set and
306        // we don't know how large it needs to be ahead of time.
307        calls_needing_exception_table_fixup.clear();
308    }
309
310    fn set_inlined_value(
311        &mut self,
312        callee: &ir::Function,
313        callee_val: ir::Value,
314        inlined_val: ir::Value,
315    ) {
316        trace!("  --> callee {callee_val:?} = inlined {inlined_val:?}");
317        debug_assert!(self.values[callee_val].is_none());
318        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
319        debug_assert!(self.values[resolved_callee_val].is_none());
320        self.values[resolved_callee_val] = Some(inlined_val).into();
321    }
322
323    fn get_inlined_value(&self, callee: &ir::Function, callee_val: ir::Value) -> Option<ir::Value> {
324        let resolved_callee_val = callee.dfg.resolve_aliases(callee_val);
325        self.values[resolved_callee_val].expand()
326    }
327}
328
329/// Inline one particular function call.
330///
331/// Returns the last inlined block in the layout.
332fn inline_one(
333    allocs: &mut InliningAllocs,
334    func: &mut ir::Function,
335    callee_func_ref: ir::FuncRef,
336    call_block: ir::Block,
337    call_inst: ir::Inst,
338    call_opcode: ir::Opcode,
339    callee: &ir::Function,
340    call_exception_table: Option<ir::ExceptionTable>,
341) -> ir::Block {
342    trace!(
343        "Inlining call {call_inst:?}: {}\n\
344         with callee = {callee:?}",
345        func.dfg.display_inst(call_inst)
346    );
347
348    // Type check callee signature.
349    let expected_callee_sig = func.dfg.ext_funcs[callee_func_ref].signature;
350    let expected_callee_sig = &func.dfg.signatures[expected_callee_sig];
351    assert_eq!(expected_callee_sig, &callee.signature);
352
353    allocs.reset(callee);
354
355    // First, append various callee entity arenas to the end of the caller's
356    // entity arenas.
357    let entity_map = create_entities(allocs, func, callee);
358
359    // Inlined prologue: split the call instruction's block at the point of the
360    // call and replace the call with a jump.
361    let return_block = split_off_return_block(func, call_inst, call_opcode, callee);
362    let call_stack_map = replace_call_with_jump(allocs, func, call_inst, callee, &entity_map);
363
364    // Prepare for translating the actual instructions by inserting the inlined
365    // blocks into the caller's layout in the same order that they appear in the
366    // callee.
367    let mut last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
368
369    // Translate each instruction from the callee into the caller,
370    // appending them to their associated block in the caller.
371    //
372    // Note that we iterate over the callee with a pre-order traversal so that
373    // we see value defs before uses.
374    for callee_block in Dfs::new().pre_order_iter(callee) {
375        let inlined_block = entity_map.inlined_block(callee_block);
376        trace!(
377            "Processing instructions in callee block {callee_block:?} (inlined block {inlined_block:?}"
378        );
379
380        let mut next_callee_inst = callee.layout.first_inst(callee_block);
381        while let Some(callee_inst) = next_callee_inst {
382            trace!(
383                "Processing callee instruction {callee_inst:?}: {}",
384                callee.dfg.display_inst(callee_inst)
385            );
386
387            assert_ne!(
388                callee.dfg.insts[callee_inst].opcode(),
389                ir::Opcode::GlobalValue,
390                "callee must already be legalized, we shouldn't see any `global_value` \
391                 instructions when inlining; found {callee_inst:?}: {}",
392                callee.dfg.display_inst(callee_inst)
393            );
394
395            // Remap the callee instruction's entities and insert it into the
396            // caller's DFG.
397            let inlined_inst_data = callee.dfg.insts[callee_inst].map(InliningInstRemapper {
398                allocs: &allocs,
399                func,
400                callee,
401                entity_map: &entity_map,
402            });
403            let inlined_inst = func.dfg.make_inst(inlined_inst_data);
404            func.layout.append_inst(inlined_inst, inlined_block);
405
406            let opcode = callee.dfg.insts[callee_inst].opcode();
407            if opcode.is_return() {
408                // Instructions that return do not define any values, so we
409                // don't need to worry about that, but we do need to fix them up
410                // so that they return by jumping to our control-flow join
411                // block, rather than returning from the caller.
412                if let Some(return_block) = return_block {
413                    fixup_inst_that_returns(
414                        allocs,
415                        func,
416                        callee,
417                        &entity_map,
418                        call_opcode,
419                        inlined_inst,
420                        callee_inst,
421                        return_block,
422                        call_stack_map.as_ref().map(|es| &**es),
423                    );
424                } else {
425                    // If we are inlining a callee that was invoked via
426                    // `return_call`, we leave inlined return instructions
427                    // as-is: there is no logical caller frame on the stack to
428                    // continue to.
429                    debug_assert_eq!(call_opcode, ir::Opcode::ReturnCall);
430                }
431            } else {
432                // Make the instruction's result values.
433                let ctrl_typevar = callee.dfg.ctrl_typevar(callee_inst);
434                func.dfg.make_inst_results(inlined_inst, ctrl_typevar);
435
436                // Update the value map for this instruction's defs.
437                let callee_results = callee.dfg.inst_results(callee_inst);
438                let inlined_results = func.dfg.inst_results(inlined_inst);
439                debug_assert_eq!(callee_results.len(), inlined_results.len());
440                for (callee_val, inlined_val) in callee_results.iter().zip(inlined_results) {
441                    allocs.set_inlined_value(callee, *callee_val, *inlined_val);
442                }
443
444                if opcode.is_call() {
445                    append_stack_map_entries(
446                        func,
447                        callee,
448                        &entity_map,
449                        call_stack_map.as_deref(),
450                        inlined_inst,
451                        callee_inst,
452                    );
453
454                    // When we are inlining a `try_call` call site, we need to merge
455                    // the call site's exception table into the inlined calls'
456                    // exception tables. This can involve rewriting regular `call`s
457                    // into `try_call`s, which requires mutating the CFG because
458                    // `try_call` is a block terminator. However, we can't mutate
459                    // the CFG in the middle of this traversal because we rely on
460                    // the existence of a one-to-one mapping between the callee
461                    // layout and the inlined layout. Instead, we record the set of
462                    // inlined call instructions that will need fixing up, and
463                    // perform that possibly-CFG-mutating exception table merging in
464                    // a follow up pass, when we no longer rely on that one-to-one
465                    // layout mapping.
466                    debug_assert_eq!(
467                        call_opcode == ir::Opcode::TryCall,
468                        call_exception_table.is_some()
469                    );
470                    if call_opcode == ir::Opcode::TryCall {
471                        allocs
472                            .calls_needing_exception_table_fixup
473                            .push(inlined_inst);
474                    }
475                }
476            }
477
478            trace!(
479                "  --> inserted inlined instruction {inlined_inst:?}: {}",
480                func.dfg.display_inst(inlined_inst)
481            );
482
483            next_callee_inst = callee.layout.next_inst(callee_inst);
484        }
485    }
486
487    // We copied *all* callee blocks into the caller's layout, but only copied
488    // the callee instructions in *reachable* callee blocks into the caller's
489    // associated blocks. Therefore, any *unreachable* blocks are empty in the
490    // caller, which is invalid CLIF because all blocks must end in a
491    // terminator, so do a quick pass over the inlined blocks and remove any
492    // empty blocks from the caller's layout.
493    for block in entity_map.iter_inlined_blocks(func) {
494        if func.layout.is_block_inserted(block) && func.layout.first_inst(block).is_none() {
495            log::trace!("removing unreachable inlined block from layout: {block}");
496
497            // If the block being removed is our last-inlined block, then back
498            // it up to the previous block in the layout, which will be the new
499            // last-inlined block after this one's removal.
500            if block == last_inlined_block {
501                last_inlined_block = func.layout.prev_block(last_inlined_block).expect(
502                    "there will always at least be the block that contained the call we are \
503                     inlining",
504                );
505            }
506
507            func.layout.remove_block(block);
508        }
509    }
510
511    // Final step: fixup the exception tables of any inlined calls when we are
512    // inlining a `try_call` site.
513    //
514    // Subtly, this requires rewriting non-catching `call[_indirect]`
515    // instructions into `try_call[_indirect]` instructions so that exceptions
516    // that unwound through the original callee frame and were caught by the
517    // caller's `try_call` do not unwind past this inlined frame. And turning a
518    // `call` into a `try_call` mutates the CFG, breaking our one-to-one mapping
519    // between callee blocks and inlined blocks, so we delay these fixups to
520    // this final step, when we no longer rely on that mapping.
521    debug_assert!(
522        allocs.calls_needing_exception_table_fixup.is_empty() || call_exception_table.is_some()
523    );
524    debug_assert_eq!(
525        call_opcode == ir::Opcode::TryCall,
526        call_exception_table.is_some()
527    );
528    if let Some(call_exception_table) = call_exception_table {
529        fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
530    }
531
532    debug_assert!(
533        func.layout.is_block_inserted(last_inlined_block),
534        "last_inlined_block={last_inlined_block} should be inserted in the layout"
535    );
536    last_inlined_block
537}
538
539/// Append stack map entries from the caller and callee to the given inlined
540/// instruction.
541fn append_stack_map_entries(
542    func: &mut ir::Function,
543    callee: &ir::Function,
544    entity_map: &EntityMap,
545    call_stack_map: Option<&[ir::UserStackMapEntry]>,
546    inlined_inst: ir::Inst,
547    callee_inst: ir::Inst,
548) {
549    // Add the caller's stack map to this call. These entries
550    // already refer to caller entities and do not need further
551    // translation.
552    func.dfg.append_user_stack_map_entries(
553        inlined_inst,
554        call_stack_map
555            .iter()
556            .flat_map(|entries| entries.iter().cloned()),
557    );
558
559    // Append the callee's stack map to this call. These entries
560    // refer to callee entities and therefore do require
561    // translation into the caller's index space.
562    func.dfg.append_user_stack_map_entries(
563        inlined_inst,
564        callee
565            .dfg
566            .user_stack_map_entries(callee_inst)
567            .iter()
568            .flat_map(|entries| entries.iter())
569            .map(|entry| ir::UserStackMapEntry {
570                ty: entry.ty,
571                slot: entity_map.inlined_stack_slot(entry.slot),
572                offset: entry.offset,
573            }),
574    );
575}
576
577/// Create or update the exception tables for any inlined call instructions:
578/// when inlining at a `try_call` site, we must forward our exceptional edges
579/// into each inlined call instruction.
580fn fixup_inlined_call_exception_tables(
581    allocs: &mut InliningAllocs,
582    func: &mut ir::Function,
583    call_exception_table: ir::ExceptionTable,
584) {
585    // Split a block at a `call[_indirect]` instruction, detach the
586    // instruction's results, and alias them to the new block's parameters.
587    let split_block_for_new_try_call = |func: &mut ir::Function, inst: ir::Inst| -> ir::Block {
588        debug_assert!(func.dfg.insts[inst].opcode().is_call());
589        debug_assert!(!func.dfg.insts[inst].opcode().is_terminator());
590
591        // Split the block.
592        let next_inst = func
593            .layout
594            .next_inst(inst)
595            .expect("inst is not a terminator, should have a successor");
596        let new_block = func.dfg.blocks.add();
597        func.layout.split_block(new_block, next_inst);
598
599        // `try_call[_indirect]` instructions do not define values themselves;
600        // the normal-return block has parameters for the results. So remove
601        // this instruction's results, create an associated block parameter for
602        // each of them, and alias them to the new block parameter.
603        let old_results = SmallValueVec::from_iter(func.dfg.inst_results(inst).iter().copied());
604        func.dfg.detach_inst_results(inst);
605        for old_result in old_results {
606            let ty = func.dfg.value_type(old_result);
607            let new_block_param = func.dfg.append_block_param(new_block, ty);
608            func.dfg.change_to_alias(old_result, new_block_param);
609        }
610
611        new_block
612    };
613
614    // Clone the caller's exception table, updating it for use in the current
615    // `call[_indirect]` instruction as it becomes a `try_call[_indirect]`.
616    let clone_exception_table_for_this_call = |func: &mut ir::Function,
617                                               signature: ir::SigRef,
618                                               new_block: ir::Block|
619     -> ir::ExceptionTable {
620        let mut exception = func.stencil.dfg.exception_tables[call_exception_table]
621            .deep_clone(&mut func.stencil.dfg.value_lists);
622
623        *exception.signature_mut() = signature;
624
625        let returns_len = func.dfg.signatures[signature].returns.len();
626        let returns_len = u32::try_from(returns_len).unwrap();
627
628        *exception.normal_return_mut() = ir::BlockCall::new(
629            new_block,
630            (0..returns_len).map(|i| ir::BlockArg::TryCallRet(i)),
631            &mut func.dfg.value_lists,
632        );
633
634        func.dfg.exception_tables.push(exception)
635    };
636
637    for inst in allocs.calls_needing_exception_table_fixup.drain(..) {
638        debug_assert!(func.dfg.insts[inst].opcode().is_call());
639        debug_assert!(!func.dfg.insts[inst].opcode().is_return());
640        match func.dfg.insts[inst] {
641            //     current_block:
642            //         preds...
643            //         rets... = call f(args...)
644            //         succs...
645            //
646            // becomes
647            //
648            //     current_block:
649            //         preds...
650            //         try_call f(args...), new_block(rets...), [call_exception_table...]
651            //     new_block(rets...):
652            //         succs...
653            ir::InstructionData::Call {
654                opcode: ir::Opcode::Call,
655                args,
656                func_ref,
657            } => {
658                let new_block = split_block_for_new_try_call(func, inst);
659                let signature = func.dfg.ext_funcs[func_ref].signature;
660                let exception = clone_exception_table_for_this_call(func, signature, new_block);
661                func.dfg.insts[inst] = ir::InstructionData::TryCall {
662                    opcode: ir::Opcode::TryCall,
663                    args,
664                    func_ref,
665                    exception,
666                };
667            }
668
669            //     current_block:
670            //         preds...
671            //         rets... = call_indirect sig, val(args...)
672            //         succs...
673            //
674            // becomes
675            //
676            //     current_block:
677            //         preds...
678            //         try_call_indirect sig, val(args...), new_block(rets...), [call_exception_table...]
679            //     new_block(rets...):
680            //         succs...
681            ir::InstructionData::CallIndirect {
682                opcode: ir::Opcode::CallIndirect,
683                args,
684                sig_ref,
685            } => {
686                let new_block = split_block_for_new_try_call(func, inst);
687                let exception = clone_exception_table_for_this_call(func, sig_ref, new_block);
688                func.dfg.insts[inst] = ir::InstructionData::TryCallIndirect {
689                    opcode: ir::Opcode::TryCallIndirect,
690                    args,
691                    exception,
692                };
693            }
694
695            // For `try_call[_indirect]` instructions, we just need to merge the
696            // exception tables.
697            ir::InstructionData::TryCall {
698                opcode: ir::Opcode::TryCall,
699                exception,
700                ..
701            }
702            | ir::InstructionData::TryCallIndirect {
703                opcode: ir::Opcode::TryCallIndirect,
704                exception,
705                ..
706            } => {
707                // Construct a new exception table that consists of
708                // the inlined instruction's exception table match
709                // sequence, with the inlining site's exception table
710                // appended. This will ensure that the first-match
711                // semantics emulates the original behavior of
712                // matching in the inner frame first.
713                let sig = func.dfg.exception_tables[exception].signature();
714                let normal_return = *func.dfg.exception_tables[exception].normal_return();
715                let exception_data = ExceptionTableData::new(
716                    sig,
717                    normal_return,
718                    func.dfg.exception_tables[exception]
719                        .items()
720                        .chain(func.dfg.exception_tables[call_exception_table].items()),
721                )
722                .deep_clone(&mut func.dfg.value_lists);
723
724                func.dfg.exception_tables[exception] = exception_data;
725            }
726
727            otherwise => unreachable!("unknown non-return call instruction: {otherwise:?}"),
728        }
729    }
730}
731
732/// After having created an inlined version of a callee instruction that returns
733/// in the caller, we need to fix it up so that it doesn't actually return
734/// (since we are already in the caller's frame) and instead just jumps to the
735/// control-flow join point.
736fn fixup_inst_that_returns(
737    allocs: &mut InliningAllocs,
738    func: &mut ir::Function,
739    callee: &ir::Function,
740    entity_map: &EntityMap,
741    call_opcode: ir::Opcode,
742    inlined_inst: ir::Inst,
743    callee_inst: ir::Inst,
744    return_block: ir::Block,
745    call_stack_map: Option<&[ir::UserStackMapEntry]>,
746) {
747    debug_assert!(func.dfg.insts[inlined_inst].opcode().is_return());
748    match func.dfg.insts[inlined_inst] {
749        //     return rets...
750        //
751        // becomes
752        //
753        //     jump return_block(rets...)
754        ir::InstructionData::MultiAry {
755            opcode: ir::Opcode::Return,
756            args,
757        } => {
758            let rets = SmallBlockArgVec::from_iter(
759                args.as_slice(&func.dfg.value_lists)
760                    .iter()
761                    .copied()
762                    .map(|v| v.into()),
763            );
764            func.dfg.replace(inlined_inst).jump(return_block, &rets);
765        }
766
767        //     return_call f(args...)
768        //
769        // becomes
770        //
771        //     rets... = call f(args...)
772        //     jump return_block(rets...)
773        ir::InstructionData::Call {
774            opcode: ir::Opcode::ReturnCall,
775            args,
776            func_ref,
777        } => {
778            func.dfg.insts[inlined_inst] = ir::InstructionData::Call {
779                opcode: ir::Opcode::Call,
780                args,
781                func_ref,
782            };
783            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
784
785            append_stack_map_entries(
786                func,
787                callee,
788                &entity_map,
789                call_stack_map,
790                inlined_inst,
791                callee_inst,
792            );
793
794            let rets = SmallBlockArgVec::from_iter(
795                func.dfg
796                    .inst_results(inlined_inst)
797                    .iter()
798                    .copied()
799                    .map(|v| v.into()),
800            );
801            let mut cursor = FuncCursor::new(func);
802            cursor.goto_after_inst(inlined_inst);
803            cursor.ins().jump(return_block, &rets);
804
805            if call_opcode == ir::Opcode::TryCall {
806                allocs
807                    .calls_needing_exception_table_fixup
808                    .push(inlined_inst);
809            }
810        }
811
812        //     return_call_indirect val(args...)
813        //
814        // becomes
815        //
816        //     rets... = call_indirect val(args...)
817        //     jump return_block(rets...)
818        ir::InstructionData::CallIndirect {
819            opcode: ir::Opcode::ReturnCallIndirect,
820            args,
821            sig_ref,
822        } => {
823            func.dfg.insts[inlined_inst] = ir::InstructionData::CallIndirect {
824                opcode: ir::Opcode::CallIndirect,
825                args,
826                sig_ref,
827            };
828            func.dfg.make_inst_results(inlined_inst, ir::types::INVALID);
829
830            append_stack_map_entries(
831                func,
832                callee,
833                &entity_map,
834                call_stack_map,
835                inlined_inst,
836                callee_inst,
837            );
838
839            let rets = SmallBlockArgVec::from_iter(
840                func.dfg
841                    .inst_results(inlined_inst)
842                    .iter()
843                    .copied()
844                    .map(|v| v.into()),
845            );
846            let mut cursor = FuncCursor::new(func);
847            cursor.goto_after_inst(inlined_inst);
848            cursor.ins().jump(return_block, &rets);
849
850            if call_opcode == ir::Opcode::TryCall {
851                allocs
852                    .calls_needing_exception_table_fixup
853                    .push(inlined_inst);
854            }
855        }
856
857        inst_data => unreachable!(
858            "should have handled all `is_return() == true` instructions above; \
859             got {inst_data:?}"
860        ),
861    }
862}
863
864/// An `InstructionMapper` implementation that remaps a callee instruction's
865/// entity references to their new indices in the caller function.
866struct InliningInstRemapper<'a> {
867    allocs: &'a InliningAllocs,
868    func: &'a mut ir::Function,
869    callee: &'a ir::Function,
870    entity_map: &'a EntityMap,
871}
872
873impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
874    fn map_value(&mut self, value: ir::Value) -> ir::Value {
875        self.allocs.get_inlined_value(self.callee, value).expect(
876            "defs come before uses; we should have already inlined all values \
877             used by an instruction",
878        )
879    }
880
881    fn map_value_list(&mut self, value_list: ir::ValueList) -> ir::ValueList {
882        let mut inlined_list = ir::ValueList::new();
883        for callee_val in value_list.as_slice(&self.callee.dfg.value_lists) {
884            let inlined_val = self.map_value(*callee_val);
885            inlined_list.push(inlined_val, &mut self.func.dfg.value_lists);
886        }
887        inlined_list
888    }
889
890    fn map_global_value(&mut self, global_value: ir::GlobalValue) -> ir::GlobalValue {
891        self.entity_map.inlined_global_value(global_value)
892    }
893
894    fn map_jump_table(&mut self, jump_table: ir::JumpTable) -> ir::JumpTable {
895        let inlined_default =
896            self.map_block_call(self.callee.dfg.jump_tables[jump_table].default_block());
897        let inlined_table = self.callee.dfg.jump_tables[jump_table]
898            .as_slice()
899            .iter()
900            .map(|callee_block_call| self.map_block_call(*callee_block_call))
901            .collect::<SmallBlockCallVec>();
902        self.func
903            .dfg
904            .jump_tables
905            .push(ir::JumpTableData::new(inlined_default, &inlined_table))
906    }
907
908    fn map_exception_table(&mut self, exception_table: ir::ExceptionTable) -> ir::ExceptionTable {
909        let exception_table = &self.callee.dfg.exception_tables[exception_table];
910        let inlined_sig_ref = self.map_sig_ref(exception_table.signature());
911        let inlined_normal_return = self.map_block_call(*exception_table.normal_return());
912        let inlined_table = exception_table
913            .items()
914            .map(|item| match item {
915                ExceptionTableItem::Tag(tag, block_call) => {
916                    ExceptionTableItem::Tag(tag, self.map_block_call(block_call))
917                }
918                ExceptionTableItem::Default(block_call) => {
919                    ExceptionTableItem::Default(self.map_block_call(block_call))
920                }
921                ExceptionTableItem::Context(value) => {
922                    ExceptionTableItem::Context(self.map_value(value))
923                }
924            })
925            .collect::<SmallVec<[_; 8]>>();
926        self.func
927            .dfg
928            .exception_tables
929            .push(ir::ExceptionTableData::new(
930                inlined_sig_ref,
931                inlined_normal_return,
932                inlined_table,
933            ))
934    }
935
936    fn map_block_call(&mut self, block_call: ir::BlockCall) -> ir::BlockCall {
937        let callee_block = block_call.block(&self.callee.dfg.value_lists);
938        let inlined_block = self.entity_map.inlined_block(callee_block);
939        let args = block_call
940            .args(&self.callee.dfg.value_lists)
941            .map(|arg| match arg {
942                ir::BlockArg::Value(value) => self.map_value(value).into(),
943                ir::BlockArg::TryCallRet(_) | ir::BlockArg::TryCallExn(_) => arg,
944            })
945            .collect::<SmallBlockArgVec>();
946        ir::BlockCall::new(inlined_block, args, &mut self.func.dfg.value_lists)
947    }
948
949    fn map_func_ref(&mut self, func_ref: ir::FuncRef) -> ir::FuncRef {
950        self.entity_map.inlined_func_ref(func_ref)
951    }
952
953    fn map_sig_ref(&mut self, sig_ref: ir::SigRef) -> ir::SigRef {
954        self.entity_map.inlined_sig_ref(sig_ref)
955    }
956
957    fn map_stack_slot(&mut self, stack_slot: ir::StackSlot) -> ir::StackSlot {
958        self.entity_map.inlined_stack_slot(stack_slot)
959    }
960
961    fn map_dynamic_stack_slot(
962        &mut self,
963        dynamic_stack_slot: ir::DynamicStackSlot,
964    ) -> ir::DynamicStackSlot {
965        self.entity_map
966            .inlined_dynamic_stack_slot(dynamic_stack_slot)
967    }
968
969    fn map_constant(&mut self, constant: ir::Constant) -> ir::Constant {
970        self.allocs
971            .constants
972            .get(constant)
973            .and_then(|o| o.expand())
974            .expect("should have inlined all callee constants")
975    }
976
977    fn map_immediate(&mut self, immediate: ir::Immediate) -> ir::Immediate {
978        self.entity_map.inlined_immediate(immediate)
979    }
980}
981
982/// Inline the callee's layout into the caller's layout.
983///
984/// Returns the last inlined block in the layout.
985fn inline_block_layout(
986    func: &mut ir::Function,
987    call_block: ir::Block,
988    callee: &ir::Function,
989    entity_map: &EntityMap,
990) -> ir::Block {
991    debug_assert!(func.layout.is_block_inserted(call_block));
992
993    // Iterate over callee blocks in layout order, inserting their associated
994    // inlined block into the caller's layout.
995    let mut prev_inlined_block = call_block;
996    let mut next_callee_block = callee.layout.entry_block();
997    while let Some(callee_block) = next_callee_block {
998        debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
999
1000        let inlined_block = entity_map.inlined_block(callee_block);
1001        func.layout
1002            .insert_block_after(inlined_block, prev_inlined_block);
1003
1004        prev_inlined_block = inlined_block;
1005        next_callee_block = callee.layout.next_block(callee_block);
1006    }
1007
1008    debug_assert!(func.layout.is_block_inserted(prev_inlined_block));
1009    prev_inlined_block
1010}
1011
1012/// Split the call instruction's block just after the call instruction to create
1013/// the point where control-flow joins after the inlined callee "returns".
1014///
1015/// Note that tail calls do not return to the caller and therefore do not have a
1016/// control-flow join point.
1017fn split_off_return_block(
1018    func: &mut ir::Function,
1019    call_inst: ir::Inst,
1020    opcode: ir::Opcode,
1021    callee: &ir::Function,
1022) -> Option<ir::Block> {
1023    // When the `call_inst` is not a block terminator, we need to split the
1024    // block.
1025    let return_block = func.layout.next_inst(call_inst).map(|next_inst| {
1026        let return_block = func.dfg.blocks.add();
1027        func.layout.split_block(return_block, next_inst);
1028
1029        // Add block parameters for each return value and alias the call
1030        // instruction's results to them.
1031        let old_results =
1032            SmallValueVec::from_iter(func.dfg.inst_results(call_inst).iter().copied());
1033        debug_assert_eq!(old_results.len(), callee.signature.returns.len());
1034        func.dfg.detach_inst_results(call_inst);
1035        for (abi, old_val) in callee.signature.returns.iter().zip(old_results) {
1036            debug_assert_eq!(abi.value_type, func.dfg.value_type(old_val));
1037            let ret_param = func.dfg.append_block_param(return_block, abi.value_type);
1038            func.dfg.change_to_alias(old_val, ret_param);
1039        }
1040
1041        return_block
1042    });
1043
1044    // When the `call_inst` is a block terminator, then it is either a
1045    // `return_call` or a `try_call`:
1046    //
1047    // * For `return_call`s, we don't have a control-flow join point, because
1048    //   the caller permanently transfers control to the callee.
1049    //
1050    // * For `try_call`s, we probably already have a block for the control-flow
1051    //   join point, but it isn't guaranteed: the `try_call` might ignore the
1052    //   call's returns and not forward them to the normal-return block or it
1053    //   might also pass additional arguments. We can only reuse the existing
1054    //   normal-return block when the `try_call` forwards exactly our callee's
1055    //   returns to that block (and therefore that block's parameter types also
1056    //   exactly match the callee's return types). Otherwise, we must create a new
1057    //   return block that forwards to the existing normal-return
1058    //   block. (Elsewhere, at the end of inlining, we will also update any inlined
1059    //   calls to forward any raised exceptions to the caller's exception table,
1060    //   as necessary.)
1061    //
1062    //   Finally, note that reusing the normal-return's target block is just an
1063    //   optimization to emit a simpler CFG when we can, and is not
1064    //   fundamentally required for correctness. We could always insert a
1065    //   temporary block as our control-flow join point that then forwards to
1066    //   the normal-return's target block. However, at the time of writing,
1067    //   Cranelift doesn't currently do any jump-threading or branch
1068    //   simplification in the mid-end, and removing unnecessary blocks in this
1069    //   way can help some subsequent mid-end optimizations. If, in the future,
1070    //   we gain support for jump-threading optimizations in the mid-end, we can
1071    //   come back and simplify the below code a bit to always generate the
1072    //   temporary block, and then rely on the subsequent optimizations to clean
1073    //   everything up.
1074    debug_assert_eq!(
1075        return_block.is_none(),
1076        opcode == ir::Opcode::ReturnCall || opcode == ir::Opcode::TryCall,
1077    );
1078    return_block.or_else(|| match func.dfg.insts[call_inst] {
1079        ir::InstructionData::TryCall {
1080            opcode: ir::Opcode::TryCall,
1081            args: _,
1082            func_ref: _,
1083            exception,
1084        } => {
1085            let normal_return = func.dfg.exception_tables[exception].normal_return();
1086            let normal_return_block = normal_return.block(&func.dfg.value_lists);
1087
1088            // Check to see if we can reuse the existing normal-return block.
1089            {
1090                let normal_return_args = normal_return.args(&func.dfg.value_lists);
1091                if normal_return_args.len() == callee.signature.returns.len()
1092                    && normal_return_args.enumerate().all(|(i, arg)| {
1093                        let i = u32::try_from(i).unwrap();
1094                        arg == ir::BlockArg::TryCallRet(i)
1095                    })
1096                {
1097                    return Some(normal_return_block);
1098                }
1099            }
1100
1101            // Okay, we cannot reuse the normal-return block. Create a new block
1102            // that has the expected block parameter types and have it jump to
1103            // the normal-return block.
1104            let return_block = func.dfg.blocks.add();
1105            func.layout.insert_block(return_block, normal_return_block);
1106
1107            let return_block_params = callee
1108                .signature
1109                .returns
1110                .iter()
1111                .map(|abi| func.dfg.append_block_param(return_block, abi.value_type))
1112                .collect::<SmallValueVec>();
1113
1114            let normal_return_args = func.dfg.exception_tables[exception]
1115                .normal_return()
1116                .args(&func.dfg.value_lists)
1117                .collect::<SmallBlockArgVec>();
1118            let jump_args = normal_return_args
1119                .into_iter()
1120                .map(|arg| match arg {
1121                    ir::BlockArg::Value(value) => ir::BlockArg::Value(value),
1122                    ir::BlockArg::TryCallRet(i) => {
1123                        let i = usize::try_from(i).unwrap();
1124                        ir::BlockArg::Value(return_block_params[i])
1125                    }
1126                    ir::BlockArg::TryCallExn(_) => {
1127                        unreachable!("normal-return edges cannot use exceptional results")
1128                    }
1129                })
1130                .collect::<SmallBlockArgVec>();
1131
1132            let mut cursor = FuncCursor::new(func);
1133            cursor.goto_first_insertion_point(return_block);
1134            cursor.ins().jump(normal_return_block, &jump_args);
1135
1136            Some(return_block)
1137        }
1138        _ => None,
1139    })
1140}
1141
1142/// Replace the caller's call instruction with a jump to the caller's inlined
1143/// copy of the callee's entry block.
1144///
1145/// Also associates the callee's parameters with the caller's arguments in our
1146/// value map.
1147///
1148/// Returns the caller's stack map entries, if any.
1149fn replace_call_with_jump(
1150    allocs: &mut InliningAllocs,
1151    func: &mut ir::Function,
1152    call_inst: ir::Inst,
1153    callee: &ir::Function,
1154    entity_map: &EntityMap,
1155) -> Option<ir::UserStackMapEntryVec> {
1156    trace!("Replacing `call` with `jump`");
1157    trace!(
1158        "  --> call instruction: {call_inst:?}: {}",
1159        func.dfg.display_inst(call_inst)
1160    );
1161
1162    let callee_entry_block = callee
1163        .layout
1164        .entry_block()
1165        .expect("callee function should have an entry block");
1166    let callee_param_values = callee.dfg.block_params(callee_entry_block);
1167    let caller_arg_values = SmallValueVec::from_iter(func.dfg.inst_args(call_inst).iter().copied());
1168    debug_assert_eq!(callee_param_values.len(), caller_arg_values.len());
1169    debug_assert_eq!(callee_param_values.len(), callee.signature.params.len());
1170    for (abi, (callee_param_value, caller_arg_value)) in callee
1171        .signature
1172        .params
1173        .iter()
1174        .zip(callee_param_values.into_iter().zip(caller_arg_values))
1175    {
1176        debug_assert_eq!(abi.value_type, callee.dfg.value_type(*callee_param_value));
1177        debug_assert_eq!(abi.value_type, func.dfg.value_type(caller_arg_value));
1178        allocs.set_inlined_value(callee, *callee_param_value, caller_arg_value);
1179    }
1180
1181    // Replace the caller's call instruction with a jump to the caller's inlined
1182    // copy of the callee's entry block.
1183    //
1184    // Note that the call block dominates the inlined entry block (and also all
1185    // other inlined blocks) so we can reference the arguments directly, and do
1186    // not need to add block parameters to the inlined entry block.
1187    let inlined_entry_block = entity_map.inlined_block(callee_entry_block);
1188    func.dfg.replace(call_inst).jump(inlined_entry_block, &[]);
1189    trace!(
1190        "  --> replaced with jump instruction: {call_inst:?}: {}",
1191        func.dfg.display_inst(call_inst)
1192    );
1193
1194    let stack_map_entries = func.dfg.take_user_stack_map_entries(call_inst);
1195    stack_map_entries
1196}
1197
1198/// Keeps track of mapping callee entities to their associated inlined caller
1199/// entities.
1200#[derive(Default)]
1201struct EntityMap {
1202    // Rather than doing an implicit, demand-based, DCE'ing translation of
1203    // entities, which would require maps from each callee entity to its
1204    // associated caller entity, we copy all entities into the caller, remember
1205    // each entity's initial offset, and then mapping from the callee to the
1206    // inlined caller entity is just adding that initial offset to the callee's
1207    // index. This should be both faster and simpler than the alternative. Most
1208    // of these sets are relatively small, and they rarely have too much dead
1209    // code in practice, so this is a good trade off.
1210    //
1211    // Note that there are a few kinds of entities that are excluded from the
1212    // `EntityMap`, and for which we do actually take the demand-based approach:
1213    // values and value lists being the notable ones.
1214    block_offset: Option<u32>,
1215    global_value_offset: Option<u32>,
1216    sig_ref_offset: Option<u32>,
1217    func_ref_offset: Option<u32>,
1218    stack_slot_offset: Option<u32>,
1219    dynamic_type_offset: Option<u32>,
1220    dynamic_stack_slot_offset: Option<u32>,
1221    immediate_offset: Option<u32>,
1222}
1223
1224impl EntityMap {
1225    fn inlined_block(&self, callee_block: ir::Block) -> ir::Block {
1226        let offset = self
1227            .block_offset
1228            .expect("must create inlined `ir::Block`s before calling `EntityMap::inlined_block`");
1229        ir::Block::from_u32(offset + callee_block.as_u32())
1230    }
1231
1232    fn iter_inlined_blocks(&self, func: &ir::Function) -> impl Iterator<Item = ir::Block> + use<> {
1233        let start = self.block_offset.expect(
1234            "must create inlined `ir::Block`s before calling `EntityMap::iter_inlined_blocks`",
1235        );
1236
1237        let end = func.dfg.blocks.len();
1238        let end = u32::try_from(end).unwrap();
1239
1240        (start..end).map(|i| ir::Block::from_u32(i))
1241    }
1242
1243    fn inlined_global_value(&self, callee_global_value: ir::GlobalValue) -> ir::GlobalValue {
1244        let offset = self
1245            .global_value_offset
1246            .expect("must create inlined `ir::GlobalValue`s before calling `EntityMap::inlined_global_value`");
1247        ir::GlobalValue::from_u32(offset + callee_global_value.as_u32())
1248    }
1249
1250    fn inlined_sig_ref(&self, callee_sig_ref: ir::SigRef) -> ir::SigRef {
1251        let offset = self.sig_ref_offset.expect(
1252            "must create inlined `ir::SigRef`s before calling `EntityMap::inlined_sig_ref`",
1253        );
1254        ir::SigRef::from_u32(offset + callee_sig_ref.as_u32())
1255    }
1256
1257    fn inlined_func_ref(&self, callee_func_ref: ir::FuncRef) -> ir::FuncRef {
1258        let offset = self.func_ref_offset.expect(
1259            "must create inlined `ir::FuncRef`s before calling `EntityMap::inlined_func_ref`",
1260        );
1261        ir::FuncRef::from_u32(offset + callee_func_ref.as_u32())
1262    }
1263
1264    fn inlined_stack_slot(&self, callee_stack_slot: ir::StackSlot) -> ir::StackSlot {
1265        let offset = self.stack_slot_offset.expect(
1266            "must create inlined `ir::StackSlot`s before calling `EntityMap::inlined_stack_slot`",
1267        );
1268        ir::StackSlot::from_u32(offset + callee_stack_slot.as_u32())
1269    }
1270
1271    fn inlined_dynamic_type(&self, callee_dynamic_type: ir::DynamicType) -> ir::DynamicType {
1272        let offset = self.dynamic_type_offset.expect(
1273            "must create inlined `ir::DynamicType`s before calling `EntityMap::inlined_dynamic_type`",
1274        );
1275        ir::DynamicType::from_u32(offset + callee_dynamic_type.as_u32())
1276    }
1277
1278    fn inlined_dynamic_stack_slot(
1279        &self,
1280        callee_dynamic_stack_slot: ir::DynamicStackSlot,
1281    ) -> ir::DynamicStackSlot {
1282        let offset = self.dynamic_stack_slot_offset.expect(
1283            "must create inlined `ir::DynamicStackSlot`s before calling `EntityMap::inlined_dynamic_stack_slot`",
1284        );
1285        ir::DynamicStackSlot::from_u32(offset + callee_dynamic_stack_slot.as_u32())
1286    }
1287
1288    fn inlined_immediate(&self, callee_immediate: ir::Immediate) -> ir::Immediate {
1289        let offset = self.immediate_offset.expect(
1290            "must create inlined `ir::Immediate`s before calling `EntityMap::inlined_immediate`",
1291        );
1292        ir::Immediate::from_u32(offset + callee_immediate.as_u32())
1293    }
1294}
1295
1296/// Translate all of the callee's various entities into the caller, producing an
1297/// `EntityMap` that can be used to translate callee entity references into
1298/// inlined caller entity references.
1299fn create_entities(
1300    allocs: &mut InliningAllocs,
1301    func: &mut ir::Function,
1302    callee: &ir::Function,
1303) -> EntityMap {
1304    let mut entity_map = EntityMap::default();
1305
1306    entity_map.block_offset = Some(create_blocks(allocs, func, callee));
1307    entity_map.global_value_offset = Some(create_global_values(func, callee));
1308    entity_map.sig_ref_offset = Some(create_sig_refs(func, callee));
1309    create_user_external_name_refs(allocs, func, callee);
1310    entity_map.func_ref_offset = Some(create_func_refs(allocs, func, callee, &entity_map));
1311    entity_map.stack_slot_offset = Some(create_stack_slots(func, callee));
1312    entity_map.dynamic_type_offset = Some(create_dynamic_types(func, callee, &entity_map));
1313    entity_map.dynamic_stack_slot_offset =
1314        Some(create_dynamic_stack_slots(func, callee, &entity_map));
1315    entity_map.immediate_offset = Some(create_immediates(func, callee));
1316
1317    // `ir::ConstantData` is deduplicated, so we cannot use our offset scheme
1318    // for `ir::Constant`s. Nonetheless, we still insert them into the caller
1319    // now, at the same time as the rest of our entities.
1320    create_constants(allocs, func, callee);
1321
1322    entity_map
1323}
1324
1325/// Create inlined blocks in the caller for every block in the callee.
1326fn create_blocks(
1327    allocs: &mut InliningAllocs,
1328    func: &mut ir::Function,
1329    callee: &ir::Function,
1330) -> u32 {
1331    let offset = func.dfg.blocks.len();
1332    let offset = u32::try_from(offset).unwrap();
1333
1334    func.dfg.blocks.reserve(callee.dfg.blocks.len());
1335    for callee_block in callee.dfg.blocks.iter() {
1336        let caller_block = func.dfg.blocks.add();
1337        trace!("Callee {callee_block:?} = inlined {caller_block:?}");
1338
1339        if callee.layout.is_cold(callee_block) {
1340            func.layout.set_cold(caller_block);
1341        }
1342
1343        // Note: the entry block does not need parameters because the only
1344        // predecessor is the call block and we associate the callee's
1345        // parameters with the caller's arguments directly.
1346        if callee.layout.entry_block() != Some(callee_block) {
1347            for callee_param in callee.dfg.blocks[callee_block].params(&callee.dfg.value_lists) {
1348                let ty = callee.dfg.value_type(*callee_param);
1349                let caller_param = func.dfg.append_block_param(caller_block, ty);
1350
1351                allocs.set_inlined_value(callee, *callee_param, caller_param);
1352            }
1353        }
1354    }
1355
1356    offset
1357}
1358
1359/// Copy and translate global values from the callee into the caller.
1360fn create_global_values(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1361    let gv_offset = func.global_values.len();
1362    let gv_offset = u32::try_from(gv_offset).unwrap();
1363
1364    func.global_values.reserve(callee.global_values.len());
1365    for gv in callee.global_values.values() {
1366        func.global_values.push(match gv {
1367            // These kinds of global values reference other global values, so we
1368            // need to fixup that reference.
1369            ir::GlobalValueData::Load {
1370                base,
1371                offset,
1372                global_type,
1373                flags,
1374            } => ir::GlobalValueData::Load {
1375                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1376                offset: *offset,
1377                global_type: *global_type,
1378                flags: *flags,
1379            },
1380            ir::GlobalValueData::IAddImm {
1381                base,
1382                offset,
1383                global_type,
1384            } => ir::GlobalValueData::IAddImm {
1385                base: ir::GlobalValue::from_u32(base.as_u32() + gv_offset),
1386                offset: *offset,
1387                global_type: *global_type,
1388            },
1389
1390            // These kinds of global values do not reference other global
1391            // values, so we can just clone them.
1392            ir::GlobalValueData::VMContext
1393            | ir::GlobalValueData::Symbol { .. }
1394            | ir::GlobalValueData::DynScaleTargetConst { .. } => gv.clone(),
1395        });
1396    }
1397
1398    gv_offset
1399}
1400
1401/// Copy `ir::SigRef`s from the callee into the caller.
1402fn create_sig_refs(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1403    let offset = func.dfg.signatures.len();
1404    let offset = u32::try_from(offset).unwrap();
1405
1406    func.dfg.signatures.reserve(callee.dfg.signatures.len());
1407    for sig in callee.dfg.signatures.values() {
1408        func.dfg.signatures.push(sig.clone());
1409    }
1410
1411    offset
1412}
1413
1414fn create_user_external_name_refs(
1415    allocs: &mut InliningAllocs,
1416    func: &mut ir::Function,
1417    callee: &ir::Function,
1418) {
1419    for (callee_named_func_ref, name) in callee.params.user_named_funcs().iter() {
1420        let caller_named_func_ref = func.declare_imported_user_function(name.clone());
1421        allocs.user_external_name_refs[callee_named_func_ref] = Some(caller_named_func_ref).into();
1422    }
1423}
1424
1425/// Translate `ir::FuncRef`s from the callee into the caller.
1426fn create_func_refs(
1427    allocs: &InliningAllocs,
1428    func: &mut ir::Function,
1429    callee: &ir::Function,
1430    entity_map: &EntityMap,
1431) -> u32 {
1432    let offset = func.dfg.ext_funcs.len();
1433    let offset = u32::try_from(offset).unwrap();
1434
1435    func.dfg.ext_funcs.reserve(callee.dfg.ext_funcs.len());
1436    for ir::ExtFuncData {
1437        name,
1438        signature,
1439        colocated,
1440    } in callee.dfg.ext_funcs.values()
1441    {
1442        func.dfg.ext_funcs.push(ir::ExtFuncData {
1443            name: match name {
1444                ir::ExternalName::User(name_ref) => {
1445                    ir::ExternalName::User(allocs.user_external_name_refs[*name_ref].expect(
1446                        "should have translated all `ir::UserExternalNameRef`s before translating \
1447                         `ir::FuncRef`s",
1448                    ))
1449                }
1450                ir::ExternalName::TestCase(_)
1451                | ir::ExternalName::LibCall(_)
1452                | ir::ExternalName::KnownSymbol(_) => name.clone(),
1453            },
1454            signature: entity_map.inlined_sig_ref(*signature),
1455            colocated: *colocated,
1456        });
1457    }
1458
1459    offset
1460}
1461
1462/// Copy stack slots from the callee into the caller.
1463fn create_stack_slots(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1464    let offset = func.sized_stack_slots.len();
1465    let offset = u32::try_from(offset).unwrap();
1466
1467    func.sized_stack_slots
1468        .reserve(callee.sized_stack_slots.len());
1469    for slot in callee.sized_stack_slots.values() {
1470        func.sized_stack_slots.push(slot.clone());
1471    }
1472
1473    offset
1474}
1475
1476/// Copy dynamic types from the callee into the caller.
1477fn create_dynamic_types(
1478    func: &mut ir::Function,
1479    callee: &ir::Function,
1480    entity_map: &EntityMap,
1481) -> u32 {
1482    let offset = func.dynamic_stack_slots.len();
1483    let offset = u32::try_from(offset).unwrap();
1484
1485    func.dfg
1486        .dynamic_types
1487        .reserve(callee.dfg.dynamic_types.len());
1488    for ir::DynamicTypeData {
1489        base_vector_ty,
1490        dynamic_scale,
1491    } in callee.dfg.dynamic_types.values()
1492    {
1493        func.dfg.dynamic_types.push(ir::DynamicTypeData {
1494            base_vector_ty: *base_vector_ty,
1495            dynamic_scale: entity_map.inlined_global_value(*dynamic_scale),
1496        });
1497    }
1498
1499    offset
1500}
1501
1502/// Copy dynamic stack slots from the callee into the caller.
1503fn create_dynamic_stack_slots(
1504    func: &mut ir::Function,
1505    callee: &ir::Function,
1506    entity_map: &EntityMap,
1507) -> u32 {
1508    let offset = func.dynamic_stack_slots.len();
1509    let offset = u32::try_from(offset).unwrap();
1510
1511    func.dynamic_stack_slots
1512        .reserve(callee.dynamic_stack_slots.len());
1513    for ir::DynamicStackSlotData { kind, dyn_ty } in callee.dynamic_stack_slots.values() {
1514        func.dynamic_stack_slots.push(ir::DynamicStackSlotData {
1515            kind: *kind,
1516            dyn_ty: entity_map.inlined_dynamic_type(*dyn_ty),
1517        });
1518    }
1519
1520    offset
1521}
1522
1523/// Copy immediates from the callee into the caller.
1524fn create_immediates(func: &mut ir::Function, callee: &ir::Function) -> u32 {
1525    let offset = func.dfg.immediates.len();
1526    let offset = u32::try_from(offset).unwrap();
1527
1528    func.dfg.immediates.reserve(callee.dfg.immediates.len());
1529    for imm in callee.dfg.immediates.values() {
1530        func.dfg.immediates.push(imm.clone());
1531    }
1532
1533    offset
1534}
1535
1536/// Copy constants from the callee into the caller.
1537fn create_constants(allocs: &mut InliningAllocs, func: &mut ir::Function, callee: &ir::Function) {
1538    for (callee_constant, data) in callee.dfg.constants.iter() {
1539        let inlined_constant = func.dfg.constants.insert(data.clone());
1540        allocs.constants[*callee_constant] = Some(inlined_constant).into();
1541    }
1542}