wasmtime_cranelift/
obj.rs

1//! Object file builder.
2//!
3//! Creates ELF image based on `Compilation` information. The ELF contains
4//! functions and trampolines in the ".text" section. It also contains all
5//! relocation records for the linking stage. If DWARF sections exist, their
6//! content will be written as well.
7//!
8//! The object file has symbols for each function and trampoline, as well as
9//! symbols that refer to libcalls.
10//!
11//! The function symbol names have format "_wasm_function_N", where N is
12//! `FuncIndex`. The defined wasm function symbols refer to a JIT compiled
13//! function body, the imported wasm function do not. The trampolines symbol
14//! names have format "_trampoline_N", where N is `SignatureIndex`.
15
16use crate::{CompiledFunction, RelocationTarget};
17use anyhow::Result;
18use cranelift_codegen::TextSectionBuilder;
19use cranelift_codegen::isa::unwind::{UnwindInfo, systemv};
20use cranelift_control::ControlPlane;
21use gimli::RunTimeEndian;
22use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer};
23use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection};
24use object::{Architecture, SectionFlags, SectionKind, SymbolFlags, SymbolKind, SymbolScope};
25use std::ops::Range;
26use wasmtime_environ::obj;
27use wasmtime_environ::{Compiler, TripleExt, Unsigned};
28
29const TEXT_SECTION_NAME: &[u8] = b".text";
30
31fn text_align(compiler: &dyn Compiler) -> u64 {
32    // text pages will not be made executable with pulley, so the section
33    // doesn't need to be padded out to page alignment boundaries.
34    if compiler.triple().is_pulley() {
35        0x1
36    } else {
37        compiler.page_size_align()
38    }
39}
40
41/// A helper structure used to assemble the final text section of an executable,
42/// plus unwinding information and other related details.
43///
44/// This builder relies on Cranelift-specific internals but assembles into a
45/// generic `Object` which will get further appended to in a compiler-agnostic
46/// fashion later.
47pub struct ModuleTextBuilder<'a> {
48    /// The target that we're compiling for, used to query target-specific
49    /// information as necessary.
50    compiler: &'a dyn Compiler,
51
52    /// The object file that we're generating code into.
53    obj: &'a mut Object<'static>,
54
55    /// The WebAssembly module we're generating code for.
56    text_section: SectionId,
57
58    unwind_info: UnwindInfoBuilder<'a>,
59
60    /// In-progress text section that we're using cranelift's `MachBuffer` to
61    /// build to resolve relocations (calls) between functions.
62    text: Box<dyn TextSectionBuilder>,
63
64    ctrl_plane: ControlPlane,
65}
66
67impl<'a> ModuleTextBuilder<'a> {
68    /// Creates a new builder for the text section of an executable.
69    ///
70    /// The `.text` section will be appended to the specified `obj` along with
71    /// any unwinding or such information as necessary. The `num_funcs`
72    /// parameter indicates the number of times the `append_func` function will
73    /// be called. The `finish` function will panic if this contract is not met.
74    pub fn new(
75        obj: &'a mut Object<'static>,
76        compiler: &'a dyn Compiler,
77        text: Box<dyn TextSectionBuilder>,
78    ) -> Self {
79        // Entire code (functions and trampolines) will be placed
80        // in the ".text" section.
81        let text_section = obj.add_section(
82            obj.segment_name(StandardSegment::Text).to_vec(),
83            TEXT_SECTION_NAME.to_vec(),
84            SectionKind::Text,
85        );
86
87        // If this target is Pulley then flag the text section as not needing the
88        // executable bit in virtual memory which means that the runtime won't
89        // try to call `Mmap::make_executable`, which makes Pulley more
90        // portable.
91        if compiler.triple().is_pulley() {
92            let section = obj.section_mut(text_section);
93            assert!(matches!(section.flags, SectionFlags::None));
94            section.flags = SectionFlags::Elf {
95                sh_flags: obj::SH_WASMTIME_NOT_EXECUTED,
96            };
97        }
98
99        Self {
100            compiler,
101            obj,
102            text_section,
103            unwind_info: Default::default(),
104            text,
105            ctrl_plane: ControlPlane::default(),
106        }
107    }
108
109    /// Appends the `func` specified named `name` to this object.
110    ///
111    /// The `resolve_reloc_target` closure is used to resolve a relocation
112    /// target to an adjacent function which has already been added or will be
113    /// added to this object. The argument is the relocation target specified
114    /// within `CompiledFunction` and the return value must be an index where
115    /// the target will be defined by the `n`th call to `append_func`.
116    ///
117    /// Returns the symbol associated with the function as well as the range
118    /// that the function resides within the text section.
119    pub fn append_func(
120        &mut self,
121        name: &str,
122        compiled_func: &'a CompiledFunction,
123        resolve_reloc_target: impl Fn(wasmtime_environ::RelocationTarget) -> usize,
124    ) -> (SymbolId, Range<u64>) {
125        let body = compiled_func.buffer.data();
126        let alignment = compiled_func.alignment;
127        let body_len = body.len() as u64;
128        let off = self
129            .text
130            .append(true, &body, alignment, &mut self.ctrl_plane);
131
132        let symbol_id = self.obj.add_symbol(Symbol {
133            name: name.as_bytes().to_vec(),
134            value: off,
135            size: body_len,
136            kind: SymbolKind::Text,
137            scope: SymbolScope::Compilation,
138            weak: false,
139            section: SymbolSection::Section(self.text_section),
140            flags: SymbolFlags::None,
141        });
142
143        if let Some(info) = compiled_func.unwind_info() {
144            self.unwind_info.push(off, body_len, info);
145        }
146
147        for r in compiled_func.relocations() {
148            let reloc_offset = off + u64::from(r.offset);
149            match r.reloc_target {
150                // Relocations against user-defined functions means that this is
151                // a relocation against a module-local function, typically a
152                // call between functions. The `text` field is given priority to
153                // resolve this relocation before we actually emit an object
154                // file, but if it can't handle it then we pass through the
155                // relocation.
156                RelocationTarget::Wasm(_) | RelocationTarget::Builtin(_) => {
157                    let target = resolve_reloc_target(r.reloc_target);
158                    if self
159                        .text
160                        .resolve_reloc(reloc_offset, r.reloc, r.addend, target)
161                    {
162                        continue;
163                    }
164
165                    // At this time it's expected that all relocations are
166                    // handled by `text.resolve_reloc`, and anything that isn't
167                    // handled is a bug in `text.resolve_reloc` or something
168                    // transitively there. If truly necessary, though, then this
169                    // loop could also be updated to forward the relocation to
170                    // the final object file as well.
171                    panic!(
172                        "unresolved relocation could not be processed against \
173                         {:?}: {r:?}",
174                        r.reloc_target,
175                    );
176                }
177
178                // This relocation is used to fill in which hostcall id is
179                // desired within the `call_indirect_host` opcode of Pulley
180                // itself. The relocation target is the start of the instruction
181                // and the goal is to insert the static signature number, `n`,
182                // into the instruction.
183                //
184                // At this time the instruction looks like:
185                //
186                //      +------+------+------+------+
187                //      | OP   | OP_EXTENDED |  N   |
188                //      +------+------+------+------+
189                //
190                // This 4-byte encoding has `OP` indicating this is an "extended
191                // opcode" where `OP_EXTENDED` is a 16-bit extended opcode.
192                // The `N` byte is the index of the signature being called and
193                // is what's b eing filled in.
194                //
195                // See the `test_call_indirect_host_width` in
196                // `pulley/tests/all.rs` for this guarantee as well.
197                RelocationTarget::PulleyHostcall(n) => {
198                    #[cfg(feature = "pulley")]
199                    {
200                        use pulley_interpreter::encode::Encode;
201                        assert_eq!(pulley_interpreter::CallIndirectHost::WIDTH, 4);
202                    }
203                    let byte = u8::try_from(n).unwrap();
204                    self.text.write(reloc_offset + 3, &[byte]);
205                }
206            };
207        }
208        (symbol_id, off..off + body_len)
209    }
210
211    /// Forces "veneers" to be used for inter-function calls in the text
212    /// section which means that in-bounds optimized addresses are never used.
213    ///
214    /// This is only useful for debugging cranelift itself and typically this
215    /// option is disabled.
216    pub fn force_veneers(&mut self) {
217        self.text.force_veneers();
218    }
219
220    /// Appends the specified amount of bytes of padding into the text section.
221    ///
222    /// This is only useful when fuzzing and/or debugging cranelift itself and
223    /// for production scenarios `padding` is 0 and this function does nothing.
224    pub fn append_padding(&mut self, padding: usize) {
225        if padding == 0 {
226            return;
227        }
228        self.text
229            .append(false, &vec![0; padding], 1, &mut self.ctrl_plane);
230    }
231
232    /// Indicates that the text section has been written completely and this
233    /// will finish appending it to the original object.
234    ///
235    /// Note that this will also write out the unwind information sections if
236    /// necessary.
237    pub fn finish(mut self) {
238        // Finish up the text section now that we're done adding functions.
239        let text = self.text.finish(&mut self.ctrl_plane);
240        self.obj
241            .section_mut(self.text_section)
242            .set_data(text, text_align(self.compiler));
243
244        // Append the unwind information for all our functions, if necessary.
245        self.unwind_info
246            .append_section(self.compiler, self.obj, self.text_section);
247    }
248}
249
250/// Builder used to create unwind information for a set of functions added to a
251/// text section.
252#[derive(Default)]
253struct UnwindInfoBuilder<'a> {
254    windows_xdata: Vec<u8>,
255    windows_pdata: Vec<RUNTIME_FUNCTION>,
256    systemv_unwind_info: Vec<(u64, &'a systemv::UnwindInfo)>,
257}
258
259// This is a mirror of `RUNTIME_FUNCTION` in the Windows API, but defined here
260// to ensure everything is always `u32` and to have it available on all
261// platforms. Note that all of these specifiers here are relative to a "base
262// address" which we define as the base of where the text section is eventually
263// loaded.
264#[expect(non_camel_case_types, reason = "matching Windows style, not Rust")]
265struct RUNTIME_FUNCTION {
266    begin: u32,
267    end: u32,
268    unwind_address: u32,
269}
270
271impl<'a> UnwindInfoBuilder<'a> {
272    /// Pushes the unwind information for a function into this builder.
273    ///
274    /// The function being described must be located at `function_offset` within
275    /// the text section itself, and the function's size is specified by
276    /// `function_len`.
277    ///
278    /// The `info` should come from Cranelift. and is handled here depending on
279    /// its flavor.
280    fn push(&mut self, function_offset: u64, function_len: u64, info: &'a UnwindInfo) {
281        match info {
282            // Windows unwind information is stored in two locations:
283            //
284            // * First is the actual unwinding information which is stored
285            //   in the `.xdata` section. This is where `info`'s emitted
286            //   information will go into.
287            // * Second are pointers to connect all this unwind information,
288            //   stored in the `.pdata` section. The `.pdata` section is an
289            //   array of `RUNTIME_FUNCTION` structures.
290            //
291            // Due to how these will be loaded at runtime the `.pdata` isn't
292            // actually assembled byte-wise here. Instead that's deferred to
293            // happen later during `write_windows_unwind_info` which will apply
294            // a further offset to `unwind_address`.
295            //
296            // FIXME: in theory we could "intern" the `unwind_info` value
297            // here within the `.xdata` section. Most of our unwind
298            // information for functions is probably pretty similar in which
299            // case the `.xdata` could be quite small and `.pdata` could
300            // have multiple functions point to the same unwinding
301            // information.
302            UnwindInfo::WindowsX64(info) => {
303                let unwind_size = info.emit_size();
304                let mut unwind_info = vec![0; unwind_size];
305                info.emit(&mut unwind_info);
306
307                // `.xdata` entries are always 4-byte aligned
308                while self.windows_xdata.len() % 4 != 0 {
309                    self.windows_xdata.push(0x00);
310                }
311                let unwind_address = self.windows_xdata.len();
312                self.windows_xdata.extend_from_slice(&unwind_info);
313
314                // Record a `RUNTIME_FUNCTION` which this will point to.
315                self.windows_pdata.push(RUNTIME_FUNCTION {
316                    begin: u32::try_from(function_offset).unwrap(),
317                    end: u32::try_from(function_offset + function_len).unwrap(),
318                    unwind_address: u32::try_from(unwind_address).unwrap(),
319                });
320            }
321
322            // See https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling
323            UnwindInfo::WindowsArm64(info) => {
324                let code_words = info.code_words();
325                let mut unwind_codes = vec![0; (code_words * 4) as usize];
326                info.emit(&mut unwind_codes);
327
328                // `.xdata` entries are always 4-byte aligned
329                while self.windows_xdata.len() % 4 != 0 {
330                    self.windows_xdata.push(0x00);
331                }
332
333                // First word:
334                // 0-17:    Function Length
335                // 18-19:   Version (must be 0)
336                // 20:      X bit (is exception data present?)
337                // 21:      E bit (has single packed epilogue?)
338                // 22-26:   Epilogue count
339                // 27-31:   Code words count
340                let requires_extended_counts = code_words > (1 << 5);
341                let encoded_function_len = function_len / 4;
342                assert!(encoded_function_len < (1 << 18), "function too large");
343                let mut word1 = u32::try_from(encoded_function_len).unwrap();
344                if !requires_extended_counts {
345                    word1 |= u32::from(code_words) << 27;
346                }
347                let unwind_address = self.windows_xdata.len();
348                self.windows_xdata.extend_from_slice(&word1.to_le_bytes());
349
350                if requires_extended_counts {
351                    // Extended counts word:
352                    // 0-15:    Epilogue count
353                    // 16-23:   Code words count
354                    let extended_counts_word = (code_words as u32) << 16;
355                    self.windows_xdata
356                        .extend_from_slice(&extended_counts_word.to_le_bytes());
357                }
358
359                // Skip epilogue information: Per comment on [`UnwindInst`], we
360                // do not emit information about epilogues.
361
362                // Emit the unwind codes.
363                self.windows_xdata.extend_from_slice(&unwind_codes);
364
365                // Record a `RUNTIME_FUNCTION` which this will point to.
366                // NOTE: `end` is not used, so leave it as 0.
367                self.windows_pdata.push(RUNTIME_FUNCTION {
368                    begin: u32::try_from(function_offset).unwrap(),
369                    end: 0,
370                    unwind_address: u32::try_from(unwind_address).unwrap(),
371                });
372            }
373
374            // System-V is different enough that we just record the unwinding
375            // information to get processed at a later time.
376            UnwindInfo::SystemV(info) => {
377                self.systemv_unwind_info.push((function_offset, info));
378            }
379
380            _ => panic!("some unwind info isn't handled here"),
381        }
382    }
383
384    /// Appends the unwind information section, if any, to the `obj` specified.
385    ///
386    /// This function must be called immediately after the text section was
387    /// added to a builder. The unwind information section must trail the text
388    /// section immediately.
389    ///
390    /// The `text_section`'s section identifier is passed into this function.
391    fn append_section(
392        &self,
393        compiler: &dyn Compiler,
394        obj: &mut Object<'_>,
395        text_section: SectionId,
396    ) {
397        // This write will align the text section to a page boundary and then
398        // return the offset at that point. This gives us the full size of the
399        // text section at that point, after alignment.
400        let text_section_size = obj.append_section_data(text_section, &[], text_align(compiler));
401
402        if self.windows_xdata.len() > 0 {
403            assert!(self.systemv_unwind_info.len() == 0);
404            // The `.xdata` section must come first to be just-after the `.text`
405            // section for the reasons documented in `write_windows_unwind_info`
406            // below.
407            let segment = obj.segment_name(StandardSegment::Data).to_vec();
408            let xdata_id = obj.add_section(segment, b".xdata".to_vec(), SectionKind::ReadOnlyData);
409            let segment = obj.segment_name(StandardSegment::Data).to_vec();
410            let pdata_id = obj.add_section(segment, b".pdata".to_vec(), SectionKind::ReadOnlyData);
411            self.write_windows_unwind_info(obj, xdata_id, pdata_id, text_section_size);
412        }
413
414        if self.systemv_unwind_info.len() > 0 {
415            let segment = obj.segment_name(StandardSegment::Data).to_vec();
416            let section_id =
417                obj.add_section(segment, b".eh_frame".to_vec(), SectionKind::ReadOnlyData);
418            self.write_systemv_unwind_info(compiler, obj, section_id, text_section_size)
419        }
420    }
421
422    /// This function appends a nonstandard section to the object which is only
423    /// used during `CodeMemory::publish`.
424    ///
425    /// This custom section effectively stores a `[RUNTIME_FUNCTION; N]` into
426    /// the object file itself. This way registration of unwind info can simply
427    /// pass this slice to the OS itself and there's no need to recalculate
428    /// anything on the other end of loading a module from a precompiled object.
429    ///
430    /// Support for reading this is in `crates/jit/src/unwind/winx64.rs`.
431    fn write_windows_unwind_info(
432        &self,
433        obj: &mut Object<'_>,
434        xdata_id: SectionId,
435        pdata_id: SectionId,
436        text_section_size: u64,
437    ) {
438        // Append the `.xdata` section, or the actual unwinding information
439        // codes and such which were built as we found unwind information for
440        // functions.
441        obj.append_section_data(xdata_id, &self.windows_xdata, 4);
442
443        // Next append the `.pdata` section, or the array of `RUNTIME_FUNCTION`
444        // structures stored in the binary.
445        //
446        // This memory will be passed at runtime to `RtlAddFunctionTable` which
447        // takes a "base address" and the entries within `RUNTIME_FUNCTION` are
448        // all relative to this base address. The base address we pass is the
449        // address of the text section itself so all the pointers here must be
450        // text-section-relative. The `begin` and `end` fields for the function
451        // it describes are already text-section-relative, but the
452        // `unwind_address` field needs to be updated here since the value
453        // stored right now is `xdata`-section-relative. We know that the
454        // `xdata` section follows the `.text` section so the
455        // `text_section_size` is added in to calculate the final
456        // `.text`-section-relative address of the unwind information.
457        let xdata_rva = |address| {
458            let address = u64::from(address);
459            let address = address + text_section_size;
460            u32::try_from(address).unwrap()
461        };
462        let pdata = match obj.architecture() {
463            Architecture::X86_64 => {
464                let mut pdata = Vec::with_capacity(self.windows_pdata.len() * 3 * 4);
465                for info in self.windows_pdata.iter() {
466                    pdata.extend_from_slice(&info.begin.to_le_bytes());
467                    pdata.extend_from_slice(&info.end.to_le_bytes());
468                    pdata.extend_from_slice(&xdata_rva(info.unwind_address).to_le_bytes());
469                }
470                pdata
471            }
472
473            Architecture::Aarch64 => {
474                // Windows Arm64 .pdata also supports packed unwind data, but
475                // we're not currently using that.
476                let mut pdata = Vec::with_capacity(self.windows_pdata.len() * 2 * 4);
477                for info in self.windows_pdata.iter() {
478                    pdata.extend_from_slice(&info.begin.to_le_bytes());
479                    pdata.extend_from_slice(&xdata_rva(info.unwind_address).to_le_bytes());
480                }
481                pdata
482            }
483
484            _ => unimplemented!("unsupported architecture for windows unwind info"),
485        };
486        obj.append_section_data(pdata_id, &pdata, 4);
487    }
488
489    /// This function appends a nonstandard section to the object which is only
490    /// used during `CodeMemory::publish`.
491    ///
492    /// This will generate a `.eh_frame` section, but not one that can be
493    /// naively loaded. The goal of this section is that we can create the
494    /// section once here and never again does it need to change. To describe
495    /// dynamically loaded functions though each individual FDE needs to talk
496    /// about the function's absolute address that it's referencing. Naturally
497    /// we don't actually know the function's absolute address when we're
498    /// creating an object here.
499    ///
500    /// To solve this problem the FDE address encoding mode is set to
501    /// `DW_EH_PE_pcrel`. This means that the actual effective address that the
502    /// FDE describes is a relative to the address of the FDE itself. By
503    /// leveraging this relative-ness we can assume that the relative distance
504    /// between the FDE and the function it describes is constant, which should
505    /// allow us to generate an FDE ahead-of-time here.
506    ///
507    /// For now this assumes that all the code of functions will start at a
508    /// page-aligned address when loaded into memory. The eh_frame encoded here
509    /// then assumes that the text section is itself page aligned to its size
510    /// and the eh_frame will follow just after the text section. This means
511    /// that the relative offsets we're using here is the FDE going backwards
512    /// into the text section itself.
513    ///
514    /// Note that the library we're using to create the FDEs, `gimli`, doesn't
515    /// actually encode addresses relative to the FDE itself. Instead the
516    /// addresses are encoded relative to the start of the `.eh_frame` section.
517    /// This makes it much easier for us where we provide the relative offset
518    /// from the start of `.eh_frame` to the function in the text section, which
519    /// given our layout basically means the offset of the function in the text
520    /// section from the end of the text section.
521    ///
522    /// A final note is that the reason we page-align the text section's size is
523    /// so the .eh_frame lives on a separate page from the text section itself.
524    /// This allows `.eh_frame` to have different virtual memory permissions,
525    /// such as being purely read-only instead of read/execute like the code
526    /// bits.
527    fn write_systemv_unwind_info(
528        &self,
529        compiler: &dyn Compiler,
530        obj: &mut Object<'_>,
531        section_id: SectionId,
532        text_section_size: u64,
533    ) {
534        let mut cie = match compiler.create_systemv_cie() {
535            Some(cie) => cie,
536            None => return,
537        };
538        let mut table = FrameTable::default();
539        cie.fde_address_encoding = gimli::constants::DW_EH_PE_pcrel;
540        let cie_id = table.add_cie(cie);
541
542        for (text_section_off, unwind_info) in self.systemv_unwind_info.iter() {
543            let backwards_off = text_section_size - text_section_off;
544            let actual_offset = -i64::try_from(backwards_off).unwrap();
545            // Note that gimli wants an unsigned 64-bit integer here, but
546            // unwinders just use this constant for a relative addition with the
547            // address of the FDE, which means that the sign doesn't actually
548            // matter.
549            let fde = unwind_info.to_fde(Address::Constant(actual_offset.unsigned()));
550            table.add_fde(cie_id, fde);
551        }
552        let endian = match compiler.triple().endianness().unwrap() {
553            target_lexicon::Endianness::Little => RunTimeEndian::Little,
554            target_lexicon::Endianness::Big => RunTimeEndian::Big,
555        };
556        let mut eh_frame = EhFrame(MyVec(EndianVec::new(endian)));
557        table.write_eh_frame(&mut eh_frame).unwrap();
558
559        // Some unwinding implementations expect a terminating "empty" length so
560        // a 0 is written at the end of the table for those implementations.
561        let mut endian_vec = (eh_frame.0).0;
562        endian_vec.write_u32(0).unwrap();
563        obj.append_section_data(section_id, endian_vec.slice(), 1);
564
565        use gimli::constants;
566        use gimli::write::Error;
567
568        struct MyVec(EndianVec<RunTimeEndian>);
569
570        impl Writer for MyVec {
571            type Endian = RunTimeEndian;
572
573            fn endian(&self) -> RunTimeEndian {
574                self.0.endian()
575            }
576
577            fn len(&self) -> usize {
578                self.0.len()
579            }
580
581            fn write(&mut self, buf: &[u8]) -> Result<(), Error> {
582                self.0.write(buf)
583            }
584
585            fn write_at(&mut self, pos: usize, buf: &[u8]) -> Result<(), Error> {
586                self.0.write_at(pos, buf)
587            }
588
589            // FIXME(gimli-rs/gimli#576) this is the definition we want for
590            // `write_eh_pointer` but the default implementation, at the time
591            // of this writing, uses `offset - val` instead of `val - offset`.
592            // A PR has been merged to fix this but until that's published we
593            // can't use it.
594            fn write_eh_pointer(
595                &mut self,
596                address: Address,
597                eh_pe: constants::DwEhPe,
598                size: u8,
599            ) -> Result<(), Error> {
600                let val = match address {
601                    Address::Constant(val) => val,
602                    Address::Symbol { .. } => unreachable!(),
603                };
604                assert_eq!(eh_pe.application(), constants::DW_EH_PE_pcrel);
605                let offset = self.len() as u64;
606                let val = val.wrapping_sub(offset);
607                self.write_eh_pointer_data(val, eh_pe.format(), size)
608            }
609        }
610    }
611}