wasmtime_cranelift_shared/
obj.rs

1//! Object file builder.
2//!
3//! Creates ELF image based on `Compilation` information. The ELF contains
4//! functions and trampolines in the ".text" section. It also contains all
5//! relocation records for the linking stage. If DWARF sections exist, their
6//! content will be written as well.
7//!
8//! The object file has symbols for each function and trampoline, as well as
9//! symbols that refer to libcalls.
10//!
11//! The function symbol names have format "_wasm_function_N", where N is
12//! `FuncIndex`. The defined wasm function symbols refer to a JIT compiled
13//! function body, the imported wasm function do not. The trampolines symbol
14//! names have format "_trampoline_N", where N is `SignatureIndex`.
15
16use crate::{CompiledFuncEnv, CompiledFunction, RelocationTarget};
17use anyhow::Result;
18use cranelift_codegen::binemit::Reloc;
19use cranelift_codegen::ir::LibCall;
20use cranelift_codegen::isa::unwind::{systemv, UnwindInfo};
21use cranelift_codegen::TextSectionBuilder;
22use cranelift_control::ControlPlane;
23use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer};
24use gimli::RunTimeEndian;
25use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection};
26use object::{Architecture, SectionKind, SymbolFlags, SymbolKind, SymbolScope};
27use std::collections::HashMap;
28use std::ops::Range;
29use wasmtime_environ::{Compiler, FuncIndex};
30
31const TEXT_SECTION_NAME: &[u8] = b".text";
32
33/// A helper structure used to assemble the final text section of an exectuable,
34/// plus unwinding information and other related details.
35///
36/// This builder relies on Cranelift-specific internals but assembles into a
37/// generic `Object` which will get further appended to in a compiler-agnostic
38/// fashion later.
39pub struct ModuleTextBuilder<'a> {
40    /// The target that we're compiling for, used to query target-specific
41    /// information as necessary.
42    compiler: &'a dyn Compiler,
43
44    /// The object file that we're generating code into.
45    obj: &'a mut Object<'static>,
46
47    /// The WebAssembly module we're generating code for.
48    text_section: SectionId,
49
50    unwind_info: UnwindInfoBuilder<'a>,
51
52    /// In-progress text section that we're using cranelift's `MachBuffer` to
53    /// build to resolve relocations (calls) between functions.
54    text: Box<dyn TextSectionBuilder>,
55
56    /// Symbols defined in the object for libcalls that relocations are applied
57    /// against.
58    ///
59    /// Note that this isn't typically used. It's only used for SSE-disabled
60    /// builds without SIMD on x86_64 right now.
61    libcall_symbols: HashMap<LibCall, SymbolId>,
62
63    ctrl_plane: ControlPlane,
64}
65
66impl<'a> ModuleTextBuilder<'a> {
67    /// Creates a new builder for the text section of an executable.
68    ///
69    /// The `.text` section will be appended to the specified `obj` along with
70    /// any unwinding or such information as necessary. The `num_funcs`
71    /// parameter indicates the number of times the `append_func` function will
72    /// be called. The `finish` function will panic if this contract is not met.
73    pub fn new(
74        obj: &'a mut Object<'static>,
75        compiler: &'a dyn Compiler,
76        text: Box<dyn TextSectionBuilder>,
77    ) -> Self {
78        // Entire code (functions and trampolines) will be placed
79        // in the ".text" section.
80        let text_section = obj.add_section(
81            obj.segment_name(StandardSegment::Text).to_vec(),
82            TEXT_SECTION_NAME.to_vec(),
83            SectionKind::Text,
84        );
85
86        Self {
87            compiler,
88            obj,
89            text_section,
90            unwind_info: Default::default(),
91            text,
92            libcall_symbols: HashMap::default(),
93            ctrl_plane: ControlPlane::default(),
94        }
95    }
96
97    /// Appends the `func` specified named `name` to this object.
98    ///
99    /// The `resolve_reloc_target` closure is used to resolve a relocation
100    /// target to an adjacent function which has already been added or will be
101    /// added to this object. The argument is the relocation target specified
102    /// within `CompiledFunction` and the return value must be an index where
103    /// the target will be defined by the `n`th call to `append_func`.
104    ///
105    /// Returns the symbol associated with the function as well as the range
106    /// that the function resides within the text section.
107    pub fn append_func(
108        &mut self,
109        name: &str,
110        compiled_func: &'a CompiledFunction<impl CompiledFuncEnv>,
111        resolve_reloc_target: impl Fn(FuncIndex) -> usize,
112    ) -> (SymbolId, Range<u64>) {
113        let body = compiled_func.buffer.data();
114        let alignment = compiled_func.alignment;
115        let body_len = body.len() as u64;
116        let off = self
117            .text
118            .append(true, &body, alignment, &mut self.ctrl_plane);
119
120        let symbol_id = self.obj.add_symbol(Symbol {
121            name: name.as_bytes().to_vec(),
122            value: off,
123            size: body_len,
124            kind: SymbolKind::Text,
125            scope: SymbolScope::Compilation,
126            weak: false,
127            section: SymbolSection::Section(self.text_section),
128            flags: SymbolFlags::None,
129        });
130
131        if let Some(info) = compiled_func.unwind_info() {
132            self.unwind_info.push(off, body_len, info);
133        }
134
135        for r in compiled_func.relocations() {
136            match r.reloc_target {
137                // Relocations against user-defined functions means that this is
138                // a relocation against a module-local function, typically a
139                // call between functions. The `text` field is given priority to
140                // resolve this relocation before we actually emit an object
141                // file, but if it can't handle it then we pass through the
142                // relocation.
143                RelocationTarget::UserFunc(index) => {
144                    let target = resolve_reloc_target(index);
145                    if self
146                        .text
147                        .resolve_reloc(off + u64::from(r.offset), r.reloc, r.addend, target)
148                    {
149                        continue;
150                    }
151
152                    // At this time it's expected that all relocations are
153                    // handled by `text.resolve_reloc`, and anything that isn't
154                    // handled is a bug in `text.resolve_reloc` or something
155                    // transitively there. If truly necessary, though, then this
156                    // loop could also be updated to forward the relocation to
157                    // the final object file as well.
158                    panic!(
159                        "unresolved relocation could not be processed against \
160                         {index:?}: {r:?}"
161                    );
162                }
163
164                // Relocations against libcalls are not common at this time and
165                // are only used in non-default configurations that disable wasm
166                // SIMD, disable SSE features, and for wasm modules that still
167                // use floating point operations.
168                //
169                // Currently these relocations are all expected to be absolute
170                // 8-byte relocations so that's asserted here and then encoded
171                // directly into the object as a normal object relocation. This
172                // is processed at module load time to resolve the relocations.
173                RelocationTarget::LibCall(call) => {
174                    let symbol = *self.libcall_symbols.entry(call).or_insert_with(|| {
175                        self.obj.add_symbol(Symbol {
176                            name: libcall_name(call).as_bytes().to_vec(),
177                            value: 0,
178                            size: 0,
179                            kind: SymbolKind::Text,
180                            scope: SymbolScope::Linkage,
181                            weak: false,
182                            section: SymbolSection::Undefined,
183                            flags: SymbolFlags::None,
184                        })
185                    });
186                    let (encoding, kind, size) = match r.reloc {
187                        Reloc::Abs8 => (
188                            object::RelocationEncoding::Generic,
189                            object::RelocationKind::Absolute,
190                            8,
191                        ),
192                        other => unimplemented!("unimplemented relocation kind {other:?}"),
193                    };
194                    self.obj
195                        .add_relocation(
196                            self.text_section,
197                            object::write::Relocation {
198                                symbol,
199                                size,
200                                kind,
201                                encoding,
202                                offset: off + u64::from(r.offset),
203                                addend: r.addend,
204                            },
205                        )
206                        .unwrap();
207                }
208            };
209        }
210        (symbol_id, off..off + body_len)
211    }
212
213    /// Forces "veneers" to be used for inter-function calls in the text
214    /// section which means that in-bounds optimized addresses are never used.
215    ///
216    /// This is only useful for debugging cranelift itself and typically this
217    /// option is disabled.
218    pub fn force_veneers(&mut self) {
219        self.text.force_veneers();
220    }
221
222    /// Appends the specified amount of bytes of padding into the text section.
223    ///
224    /// This is only useful when fuzzing and/or debugging cranelift itself and
225    /// for production scenarios `padding` is 0 and this function does nothing.
226    pub fn append_padding(&mut self, padding: usize) {
227        if padding == 0 {
228            return;
229        }
230        self.text
231            .append(false, &vec![0; padding], 1, &mut self.ctrl_plane);
232    }
233
234    /// Indicates that the text section has been written completely and this
235    /// will finish appending it to the original object.
236    ///
237    /// Note that this will also write out the unwind information sections if
238    /// necessary.
239    pub fn finish(mut self) {
240        // Finish up the text section now that we're done adding functions.
241        let text = self.text.finish(&mut self.ctrl_plane);
242        self.obj
243            .section_mut(self.text_section)
244            .set_data(text, self.compiler.page_size_align());
245
246        // Append the unwind information for all our functions, if necessary.
247        self.unwind_info
248            .append_section(self.compiler, self.obj, self.text_section);
249    }
250}
251
252/// Builder used to create unwind information for a set of functions added to a
253/// text section.
254#[derive(Default)]
255struct UnwindInfoBuilder<'a> {
256    windows_xdata: Vec<u8>,
257    windows_pdata: Vec<RUNTIME_FUNCTION>,
258    systemv_unwind_info: Vec<(u64, &'a systemv::UnwindInfo)>,
259}
260
261// This is a mirror of `RUNTIME_FUNCTION` in the Windows API, but defined here
262// to ensure everything is always `u32` and to have it available on all
263// platforms. Note that all of these specifiers here are relative to a "base
264// address" which we define as the base of where the text section is eventually
265// loaded.
266#[allow(non_camel_case_types)]
267struct RUNTIME_FUNCTION {
268    begin: u32,
269    end: u32,
270    unwind_address: u32,
271}
272
273impl<'a> UnwindInfoBuilder<'a> {
274    /// Pushes the unwind information for a function into this builder.
275    ///
276    /// The function being described must be located at `function_offset` within
277    /// the text section itself, and the function's size is specified by
278    /// `function_len`.
279    ///
280    /// The `info` should come from Cranelift. and is handled here depending on
281    /// its flavor.
282    fn push(&mut self, function_offset: u64, function_len: u64, info: &'a UnwindInfo) {
283        match info {
284            // Windows unwind information is stored in two locations:
285            //
286            // * First is the actual unwinding information which is stored
287            //   in the `.xdata` section. This is where `info`'s emitted
288            //   information will go into.
289            // * Second are pointers to connect all this unwind information,
290            //   stored in the `.pdata` section. The `.pdata` section is an
291            //   array of `RUNTIME_FUNCTION` structures.
292            //
293            // Due to how these will be loaded at runtime the `.pdata` isn't
294            // actually assembled byte-wise here. Instead that's deferred to
295            // happen later during `write_windows_unwind_info` which will apply
296            // a further offset to `unwind_address`.
297            UnwindInfo::WindowsX64(info) => {
298                let unwind_size = info.emit_size();
299                let mut unwind_info = vec![0; unwind_size];
300                info.emit(&mut unwind_info);
301
302                // `.xdata` entries are always 4-byte aligned
303                //
304                // FIXME: in theory we could "intern" the `unwind_info` value
305                // here within the `.xdata` section. Most of our unwind
306                // information for functions is probably pretty similar in which
307                // case the `.xdata` could be quite small and `.pdata` could
308                // have multiple functions point to the same unwinding
309                // information.
310                while self.windows_xdata.len() % 4 != 0 {
311                    self.windows_xdata.push(0x00);
312                }
313                let unwind_address = self.windows_xdata.len();
314                self.windows_xdata.extend_from_slice(&unwind_info);
315
316                // Record a `RUNTIME_FUNCTION` which this will point to.
317                self.windows_pdata.push(RUNTIME_FUNCTION {
318                    begin: u32::try_from(function_offset).unwrap(),
319                    end: u32::try_from(function_offset + function_len).unwrap(),
320                    unwind_address: u32::try_from(unwind_address).unwrap(),
321                });
322            }
323
324            // System-V is different enough that we just record the unwinding
325            // information to get processed at a later time.
326            UnwindInfo::SystemV(info) => {
327                self.systemv_unwind_info.push((function_offset, info));
328            }
329
330            _ => panic!("some unwind info isn't handled here"),
331        }
332    }
333
334    /// Appends the unwind information section, if any, to the `obj` specified.
335    ///
336    /// This function must be called immediately after the text section was
337    /// added to a builder. The unwind information section must trail the text
338    /// section immediately.
339    ///
340    /// The `text_section`'s section identifier is passed into this function.
341    fn append_section(
342        &self,
343        compiler: &dyn Compiler,
344        obj: &mut Object<'_>,
345        text_section: SectionId,
346    ) {
347        // This write will align the text section to a page boundary and then
348        // return the offset at that point. This gives us the full size of the
349        // text section at that point, after alignment.
350        let text_section_size =
351            obj.append_section_data(text_section, &[], compiler.page_size_align());
352
353        if self.windows_xdata.len() > 0 {
354            assert!(self.systemv_unwind_info.len() == 0);
355            // The `.xdata` section must come first to be just-after the `.text`
356            // section for the reasons documented in `write_windows_unwind_info`
357            // below.
358            let segment = obj.segment_name(StandardSegment::Data).to_vec();
359            let xdata_id = obj.add_section(segment, b".xdata".to_vec(), SectionKind::ReadOnlyData);
360            let segment = obj.segment_name(StandardSegment::Data).to_vec();
361            let pdata_id = obj.add_section(segment, b".pdata".to_vec(), SectionKind::ReadOnlyData);
362            self.write_windows_unwind_info(obj, xdata_id, pdata_id, text_section_size);
363        }
364
365        if self.systemv_unwind_info.len() > 0 {
366            let segment = obj.segment_name(StandardSegment::Data).to_vec();
367            let section_id =
368                obj.add_section(segment, b".eh_frame".to_vec(), SectionKind::ReadOnlyData);
369            self.write_systemv_unwind_info(compiler, obj, section_id, text_section_size)
370        }
371    }
372
373    /// This function appends a nonstandard section to the object which is only
374    /// used during `CodeMemory::publish`.
375    ///
376    /// This custom section effectively stores a `[RUNTIME_FUNCTION; N]` into
377    /// the object file itself. This way registration of unwind info can simply
378    /// pass this slice to the OS itself and there's no need to recalculate
379    /// anything on the other end of loading a module from a precompiled object.
380    ///
381    /// Support for reading this is in `crates/jit/src/unwind/winx64.rs`.
382    fn write_windows_unwind_info(
383        &self,
384        obj: &mut Object<'_>,
385        xdata_id: SectionId,
386        pdata_id: SectionId,
387        text_section_size: u64,
388    ) {
389        // Currently the binary format supported here only supports
390        // little-endian for x86_64, or at least that's all where it's tested.
391        // This may need updates for other platforms.
392        assert_eq!(obj.architecture(), Architecture::X86_64);
393
394        // Append the `.xdata` section, or the actual unwinding information
395        // codes and such which were built as we found unwind information for
396        // functions.
397        obj.append_section_data(xdata_id, &self.windows_xdata, 4);
398
399        // Next append the `.pdata` section, or the array of `RUNTIME_FUNCTION`
400        // structures stored in the binary.
401        //
402        // This memory will be passed at runtime to `RtlAddFunctionTable` which
403        // takes a "base address" and the entries within `RUNTIME_FUNCTION` are
404        // all relative to this base address. The base address we pass is the
405        // address of the text section itself so all the pointers here must be
406        // text-section-relative. The `begin` and `end` fields for the function
407        // it describes are already text-section-relative, but the
408        // `unwind_address` field needs to be updated here since the value
409        // stored right now is `xdata`-section-relative. We know that the
410        // `xdata` section follows the `.text` section so the
411        // `text_section_size` is added in to calculate the final
412        // `.text`-section-relative address of the unwind information.
413        let mut pdata = Vec::with_capacity(self.windows_pdata.len() * 3 * 4);
414        for info in self.windows_pdata.iter() {
415            pdata.extend_from_slice(&info.begin.to_le_bytes());
416            pdata.extend_from_slice(&info.end.to_le_bytes());
417            let address = text_section_size + u64::from(info.unwind_address);
418            let address = u32::try_from(address).unwrap();
419            pdata.extend_from_slice(&address.to_le_bytes());
420        }
421        obj.append_section_data(pdata_id, &pdata, 4);
422    }
423
424    /// This function appends a nonstandard section to the object which is only
425    /// used during `CodeMemory::publish`.
426    ///
427    /// This will generate a `.eh_frame` section, but not one that can be
428    /// naively loaded. The goal of this section is that we can create the
429    /// section once here and never again does it need to change. To describe
430    /// dynamically loaded functions though each individual FDE needs to talk
431    /// about the function's absolute address that it's referencing. Naturally
432    /// we don't actually know the function's absolute address when we're
433    /// creating an object here.
434    ///
435    /// To solve this problem the FDE address encoding mode is set to
436    /// `DW_EH_PE_pcrel`. This means that the actual effective address that the
437    /// FDE describes is a relative to the address of the FDE itself. By
438    /// leveraging this relative-ness we can assume that the relative distance
439    /// between the FDE and the function it describes is constant, which should
440    /// allow us to generate an FDE ahead-of-time here.
441    ///
442    /// For now this assumes that all the code of functions will start at a
443    /// page-aligned address when loaded into memory. The eh_frame encoded here
444    /// then assumes that the text section is itself page aligned to its size
445    /// and the eh_frame will follow just after the text section. This means
446    /// that the relative offsets we're using here is the FDE going backwards
447    /// into the text section itself.
448    ///
449    /// Note that the library we're using to create the FDEs, `gimli`, doesn't
450    /// actually encode addresses relative to the FDE itself. Instead the
451    /// addresses are encoded relative to the start of the `.eh_frame` section.
452    /// This makes it much easier for us where we provide the relative offset
453    /// from the start of `.eh_frame` to the function in the text section, which
454    /// given our layout basically means the offset of the function in the text
455    /// section from the end of the text section.
456    ///
457    /// A final note is that the reason we page-align the text section's size is
458    /// so the .eh_frame lives on a separate page from the text section itself.
459    /// This allows `.eh_frame` to have different virtual memory permissions,
460    /// such as being purely read-only instead of read/execute like the code
461    /// bits.
462    fn write_systemv_unwind_info(
463        &self,
464        compiler: &dyn Compiler,
465        obj: &mut Object<'_>,
466        section_id: SectionId,
467        text_section_size: u64,
468    ) {
469        let mut cie = compiler
470            .create_systemv_cie()
471            .expect("must be able to create a CIE for system-v unwind info");
472        let mut table = FrameTable::default();
473        cie.fde_address_encoding = gimli::constants::DW_EH_PE_pcrel;
474        let cie_id = table.add_cie(cie);
475
476        for (text_section_off, unwind_info) in self.systemv_unwind_info.iter() {
477            let backwards_off = text_section_size - text_section_off;
478            let actual_offset = -i64::try_from(backwards_off).unwrap();
479            // Note that gimli wants an unsigned 64-bit integer here, but
480            // unwinders just use this constant for a relative addition with the
481            // address of the FDE, which means that the sign doesn't actually
482            // matter.
483            let fde = unwind_info.to_fde(Address::Constant(actual_offset as u64));
484            table.add_fde(cie_id, fde);
485        }
486        let endian = match compiler.triple().endianness().unwrap() {
487            target_lexicon::Endianness::Little => RunTimeEndian::Little,
488            target_lexicon::Endianness::Big => RunTimeEndian::Big,
489        };
490        let mut eh_frame = EhFrame(MyVec(EndianVec::new(endian)));
491        table.write_eh_frame(&mut eh_frame).unwrap();
492
493        // Some unwinding implementations expect a terminating "empty" length so
494        // a 0 is written at the end of the table for those implementations.
495        let mut endian_vec = (eh_frame.0).0;
496        endian_vec.write_u32(0).unwrap();
497        obj.append_section_data(section_id, endian_vec.slice(), 1);
498
499        use gimli::constants;
500        use gimli::write::Error;
501
502        struct MyVec(EndianVec<RunTimeEndian>);
503
504        impl Writer for MyVec {
505            type Endian = RunTimeEndian;
506
507            fn endian(&self) -> RunTimeEndian {
508                self.0.endian()
509            }
510
511            fn len(&self) -> usize {
512                self.0.len()
513            }
514
515            fn write(&mut self, buf: &[u8]) -> Result<(), Error> {
516                self.0.write(buf)
517            }
518
519            fn write_at(&mut self, pos: usize, buf: &[u8]) -> Result<(), Error> {
520                self.0.write_at(pos, buf)
521            }
522
523            // FIXME(gimli-rs/gimli#576) this is the definition we want for
524            // `write_eh_pointer` but the default implementation, at the time
525            // of this writing, uses `offset - val` instead of `val - offset`.
526            // A PR has been merged to fix this but until that's published we
527            // can't use it.
528            fn write_eh_pointer(
529                &mut self,
530                address: Address,
531                eh_pe: constants::DwEhPe,
532                size: u8,
533            ) -> Result<(), Error> {
534                let val = match address {
535                    Address::Constant(val) => val,
536                    Address::Symbol { .. } => unreachable!(),
537                };
538                assert_eq!(eh_pe.application(), constants::DW_EH_PE_pcrel);
539                let offset = self.len() as u64;
540                let val = val.wrapping_sub(offset);
541                self.write_eh_pointer_data(val, eh_pe.format(), size)
542            }
543        }
544    }
545}
546
547fn libcall_name(call: LibCall) -> &'static str {
548    use wasmtime_environ::obj::LibCall as LC;
549    let other = match call {
550        LibCall::FloorF32 => LC::FloorF32,
551        LibCall::FloorF64 => LC::FloorF64,
552        LibCall::NearestF32 => LC::NearestF32,
553        LibCall::NearestF64 => LC::NearestF64,
554        LibCall::CeilF32 => LC::CeilF32,
555        LibCall::CeilF64 => LC::CeilF64,
556        LibCall::TruncF32 => LC::TruncF32,
557        LibCall::TruncF64 => LC::TruncF64,
558        LibCall::FmaF32 => LC::FmaF32,
559        LibCall::FmaF64 => LC::FmaF64,
560        LibCall::X86Pshufb => LC::X86Pshufb,
561        _ => panic!("unknown libcall to give a name to: {call:?}"),
562    };
563    other.symbol()
564}