wasmer_runtime_core_near/
codegen.rs

1//! The codegen module provides common functions and data structures used by multiple backends
2//! during the code generation process.
3#[cfg(unix)]
4use crate::fault::FaultInfo;
5use crate::{
6    backend::RunnableModule,
7    backend::{CacheGen, Compiler, CompilerConfig, Features, Token},
8    cache::{Artifact, Error as CacheError},
9    error::{CompileError, CompileResult, RuntimeError},
10    module::{ModuleInfo, ModuleInner},
11    structures::Map,
12    types::{FuncIndex, FuncSig, SigIndex},
13};
14use smallvec::SmallVec;
15use std::collections::HashMap;
16use std::fmt;
17use std::fmt::Debug;
18use std::marker::PhantomData;
19use std::sync::{Arc, RwLock};
20use wasmparser::{self, WasmDecoder};
21use wasmparser::{Operator, Type as WpType};
22
23/// A type that defines a function pointer, which is called when breakpoints occur.
24pub type BreakpointHandler =
25    Box<dyn Fn(BreakpointInfo) -> Result<(), RuntimeError> + Send + Sync + 'static>;
26
27/// Maps instruction pointers to their breakpoint handlers.
28pub type BreakpointMap = Arc<HashMap<usize, BreakpointHandler>>;
29
30/// An event generated during parsing of a wasm binary
31#[derive(Debug)]
32pub enum Event<'a, 'b> {
33    /// An internal event created by the parser used to provide hooks during code generation.
34    Internal(InternalEvent),
35    /// An event generated by parsing a wasm operator
36    Wasm(&'b Operator<'a>),
37    /// An event generated by parsing a wasm operator that contains an owned `Operator`
38    WasmOwned(Operator<'a>),
39}
40
41/// Kinds of `InternalEvent`s created during parsing.
42pub enum InternalEvent {
43    /// A function parse is about to begin.
44    FunctionBegin(u32),
45    /// A function parsing has just completed.
46    FunctionEnd,
47    /// A breakpoint emitted during parsing.
48    Breakpoint(BreakpointHandler),
49    /// Indicates setting an internal field.
50    SetInternal(u32),
51    /// Indicates getting an internal field.
52    GetInternal(u32),
53}
54
55impl fmt::Debug for InternalEvent {
56    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57        match self {
58            InternalEvent::FunctionBegin(_) => write!(f, "FunctionBegin"),
59            InternalEvent::FunctionEnd => write!(f, "FunctionEnd"),
60            InternalEvent::Breakpoint(_) => write!(f, "Breakpoint"),
61            InternalEvent::SetInternal(_) => write!(f, "SetInternal"),
62            InternalEvent::GetInternal(_) => write!(f, "GetInternal"),
63        }
64    }
65}
66
67/// Type representing an area of Wasm code in bytes as an offset from the
68/// beginning of the code section.
69///
70/// `start` must be less than or equal to `end`.
71#[derive(Copy, Clone, Debug)]
72pub struct WasmSpan {
73    /// Start offset in bytes from the beginning of the Wasm code section
74    start: u32,
75    /// End offset in bytes from the beginning of the Wasm code section
76    end: u32,
77}
78
79impl WasmSpan {
80    /// Create a new `WasmSpan`.
81    ///
82    /// `start` must be less than or equal to `end`.
83    // TODO: mark this function as `const` when asserts get stabilized as `const`
84    // see: https://github.com/rust-lang/rust/issues/57563
85    pub fn new(start: u32, end: u32) -> Self {
86        debug_assert!(start <= end);
87        Self { start, end }
88    }
89
90    /// Start offset in bytes from the beginning of the Wasm code section
91    pub const fn start(&self) -> u32 {
92        self.start
93    }
94
95    /// End offset in bytes from the beginning of the Wasm code section
96    pub const fn end(&self) -> u32 {
97        self.end
98    }
99
100    /// Size in bytes of the span
101    pub const fn size(&self) -> u32 {
102        self.end - self.start
103    }
104}
105
106/// Information for a breakpoint
107#[cfg(unix)]
108pub struct BreakpointInfo<'a> {
109    /// Fault.
110    pub fault: Option<&'a FaultInfo>,
111}
112
113/// Information for a breakpoint
114#[cfg(not(unix))]
115pub struct BreakpointInfo {
116    /// Fault placeholder.
117    pub fault: Option<()>,
118}
119
120/// A trait that represents the functions needed to be implemented to generate code for a module.
121pub trait ModuleCodeGenerator<FCG: FunctionCodeGenerator<E>, RM: RunnableModule, E: Debug> {
122    /// Creates a new module code generator.
123    fn new() -> Self;
124
125    /// Creates a new module code generator for specified target.
126    fn new_with_target(
127        triple: Option<String>,
128        cpu_name: Option<String>,
129        cpu_features: Option<String>,
130    ) -> Self;
131
132    /// Returns the backend id associated with this MCG.
133    fn backend_id() -> &'static str;
134
135    /// It sets if the current compiler requires validation before compilation
136    fn requires_pre_validation() -> bool {
137        true
138    }
139
140    /// Feeds the compiler config.
141    fn feed_compiler_config(&mut self, _config: &CompilerConfig) -> Result<(), E> {
142        Ok(())
143    }
144    /// Adds an import function.
145    fn feed_import_function(&mut self, _sigindex: SigIndex) -> Result<(), E>;
146    /// Sets the signatures.
147    fn feed_signatures(&mut self, signatures: Map<SigIndex, FuncSig>) -> Result<(), E>;
148    /// Sets function signatures.
149    fn feed_function_signatures(&mut self, assoc: Map<FuncIndex, SigIndex>) -> Result<(), E>;
150    /// Checks the precondition for a module.
151    fn check_precondition(&mut self, module_info: &ModuleInfo) -> Result<(), E>;
152    /// Creates a new function and returns the function-scope code generator for it.
153    fn next_function(
154        &mut self,
155        module_info: Arc<RwLock<ModuleInfo>>,
156        loc: WasmSpan,
157    ) -> Result<&mut FCG, E>;
158    /// Finalizes this module.
159    fn finalize(
160        self,
161        module_info: &ModuleInfo,
162    ) -> Result<(RM, Option<DebugMetadata>, Box<dyn CacheGen>), E>;
163
164    /// Creates a module from cache.
165    unsafe fn from_cache(cache: Artifact, _: Token) -> Result<ModuleInner, CacheError>;
166}
167
168/// Mock item when compiling without debug info generation.
169#[cfg(not(feature = "generate-debug-information"))]
170type CompiledFunctionData = ();
171
172/// Mock item when compiling without debug info generation.
173#[cfg(not(feature = "generate-debug-information"))]
174type ValueLabelsRangesInner = ();
175
176#[cfg(feature = "generate-debug-information")]
177use wasm_debug::types::{CompiledFunctionData, ValueLabelsRangesInner};
178
179#[derive(Clone, Debug)]
180/// Useful information for debugging gathered by compiling a Wasm module.
181pub struct DebugMetadata {
182    /// [`CompiledFunctionData`] in [`FuncIndex`] order
183    pub func_info: Map<FuncIndex, CompiledFunctionData>,
184    /// [`ValueLabelsRangesInner`] in [`FuncIndex`] order
185    pub inst_info: Map<FuncIndex, ValueLabelsRangesInner>,
186    /// Stack slot offsets in [`FuncIndex`] order
187    pub stack_slot_offsets: Map<FuncIndex, Vec<Option<i32>>>,
188    /// function pointers and their lengths
189    pub pointers: Vec<(*const u8, usize)>,
190}
191
192/// A streaming compiler which is designed to generated code for a module based on a stream
193/// of wasm parser events.
194pub struct StreamingCompiler<
195    MCG: ModuleCodeGenerator<FCG, RM, E>,
196    FCG: FunctionCodeGenerator<E>,
197    RM: RunnableModule + 'static,
198    E: Debug,
199    CGEN: Fn() -> MiddlewareChain,
200> {
201    middleware_chain_generator: CGEN,
202    _phantom_mcg: PhantomData<MCG>,
203    _phantom_fcg: PhantomData<FCG>,
204    _phantom_rm: PhantomData<RM>,
205    _phantom_e: PhantomData<E>,
206}
207
208/// A simple generator for a `StreamingCompiler`.
209pub struct SimpleStreamingCompilerGen<
210    MCG: ModuleCodeGenerator<FCG, RM, E>,
211    FCG: FunctionCodeGenerator<E>,
212    RM: RunnableModule + 'static,
213    E: Debug,
214> {
215    _phantom_mcg: PhantomData<MCG>,
216    _phantom_fcg: PhantomData<FCG>,
217    _phantom_rm: PhantomData<RM>,
218    _phantom_e: PhantomData<E>,
219}
220
221impl<
222        MCG: ModuleCodeGenerator<FCG, RM, E>,
223        FCG: FunctionCodeGenerator<E>,
224        RM: RunnableModule + 'static,
225        E: Debug,
226    > SimpleStreamingCompilerGen<MCG, FCG, RM, E>
227{
228    /// Create a new `StreamingCompiler`.
229    pub fn new() -> StreamingCompiler<MCG, FCG, RM, E, impl Fn() -> MiddlewareChain> {
230        StreamingCompiler::new(|| MiddlewareChain::new())
231    }
232}
233
234impl<
235        MCG: ModuleCodeGenerator<FCG, RM, E>,
236        FCG: FunctionCodeGenerator<E>,
237        RM: RunnableModule + 'static,
238        E: Debug,
239        CGEN: Fn() -> MiddlewareChain,
240    > StreamingCompiler<MCG, FCG, RM, E, CGEN>
241{
242    /// Create a new `StreamingCompiler` with the given `MiddlewareChain`.
243    pub fn new(chain_gen: CGEN) -> Self {
244        Self {
245            middleware_chain_generator: chain_gen,
246            _phantom_mcg: PhantomData,
247            _phantom_fcg: PhantomData,
248            _phantom_rm: PhantomData,
249            _phantom_e: PhantomData,
250        }
251    }
252}
253
254/// Create a new `ValidatingParserConfig` with the given features.
255pub fn validating_parser_config(features: &Features) -> wasmparser::ValidatingParserConfig {
256    wasmparser::ValidatingParserConfig {
257        operator_config: wasmparser::OperatorValidatorConfig {
258            enable_threads: features.threads,
259            enable_reference_types: false,
260            enable_simd: features.simd,
261            enable_bulk_memory: false,
262            enable_multi_value: false,
263
264            #[cfg(feature = "deterministic-execution")]
265            deterministic_only: true,
266        },
267    }
268}
269
270fn validate_with_features(bytes: &[u8], features: &Features) -> CompileResult<()> {
271    let mut parser =
272        wasmparser::ValidatingParser::new(bytes, Some(validating_parser_config(features)));
273    loop {
274        let state = parser.read();
275        match *state {
276            wasmparser::ParserState::EndWasm => break Ok(()),
277            wasmparser::ParserState::Error(ref err) => Err(CompileError::ValidationError {
278                msg: err.message().to_string(),
279            })?,
280            _ => {}
281        }
282    }
283}
284
285impl<
286        MCG: ModuleCodeGenerator<FCG, RM, E>,
287        FCG: FunctionCodeGenerator<E>,
288        RM: RunnableModule + 'static,
289        E: Debug,
290        CGEN: Fn() -> MiddlewareChain,
291    > Compiler for StreamingCompiler<MCG, FCG, RM, E, CGEN>
292{
293    #[allow(unused_variables)]
294    fn compile(
295        &self,
296        wasm: &[u8],
297        compiler_config: CompilerConfig,
298        _: Token,
299    ) -> CompileResult<ModuleInner> {
300        if MCG::requires_pre_validation() {
301            validate_with_features(wasm, &compiler_config.features)?;
302        }
303
304        let mut mcg = match MCG::backend_id() {
305            "llvm" => MCG::new_with_target(
306                compiler_config.triple.clone(),
307                compiler_config.cpu_name.clone(),
308                compiler_config.cpu_features.clone(),
309            ),
310            _ => MCG::new(),
311        };
312        let mut chain = (self.middleware_chain_generator)();
313        let info = crate::parse::read_module(wasm, &mut mcg, &mut chain, &compiler_config)?;
314        let (exec_context, compile_debug_info, cache_gen) = mcg
315            .finalize(&info.read().unwrap())
316            .map_err(|x| CompileError::InternalError {
317                msg: format!("{:?}", x),
318            })?;
319
320        #[cfg(feature = "generate-debug-information")]
321        {
322            if compiler_config.should_generate_debug_info() {
323                if let Some(dbg_info) = compile_debug_info {
324                    let debug_info = wasm_debug::read_debuginfo(wasm);
325                    let extra_info = wasm_debug::types::ModuleVmctxInfo::new(
326                        crate::vm::Ctx::offset_memory_base() as _,
327                        std::mem::size_of::<crate::vm::Ctx>() as _,
328                        dbg_info.stack_slot_offsets.values(),
329                    );
330                    let compiled_fn_map =
331                        wasm_debug::types::create_module_address_map(dbg_info.func_info.values());
332                    let range_map =
333                        wasm_debug::types::build_values_ranges(dbg_info.inst_info.values());
334                    let raw_func_slice = &dbg_info.pointers;
335
336                    let debug_image = wasm_debug::emit_debugsections_image(
337                        target_lexicon::HOST,
338                        std::mem::size_of::<usize>() as u8,
339                        &debug_info,
340                        &extra_info,
341                        &compiled_fn_map,
342                        &range_map,
343                        raw_func_slice,
344                    )
345                    .expect("make debug image");
346
347                    let mut writer = info.write().unwrap();
348                    writer
349                        .debug_info_manager
350                        .register_new_jit_code_entry(&debug_image);
351                }
352            }
353        }
354
355        Ok(ModuleInner {
356            cache_gen,
357            runnable_module: Arc::new(Box::new(exec_context)),
358            info: Arc::try_unwrap(info).unwrap().into_inner().unwrap(),
359        })
360    }
361
362    unsafe fn from_cache(
363        &self,
364        artifact: Artifact,
365        token: Token,
366    ) -> Result<ModuleInner, CacheError> {
367        MCG::from_cache(artifact, token)
368    }
369}
370
371/// A sink for parse events.
372pub struct EventSink<'a, 'b> {
373    buffer: SmallVec<[Event<'a, 'b>; 2]>,
374}
375
376impl<'a, 'b> EventSink<'a, 'b> {
377    /// Push a new `Event` to this sink.
378    pub fn push(&mut self, ev: Event<'a, 'b>) {
379        self.buffer.push(ev);
380    }
381}
382
383/// A container for a chain of middlewares.
384pub struct MiddlewareChain {
385    chain: Vec<Box<dyn GenericFunctionMiddleware>>,
386}
387
388impl MiddlewareChain {
389    /// Create a new empty `MiddlewareChain`.
390    pub fn new() -> MiddlewareChain {
391        MiddlewareChain { chain: vec![] }
392    }
393
394    /// Push a new `FunctionMiddleware` to this `MiddlewareChain`.
395    pub fn push<M: FunctionMiddleware + 'static>(&mut self, m: M) {
396        self.chain.push(Box::new(m));
397    }
398
399    /// Run this chain with the provided function code generator, event and module info.
400    pub(crate) fn run<E: Debug, FCG: FunctionCodeGenerator<E>>(
401        &mut self,
402        fcg: Option<&mut FCG>,
403        ev: Event,
404        module_info: &ModuleInfo,
405        source_loc: u32,
406    ) -> Result<(), String> {
407        let mut sink = EventSink {
408            buffer: SmallVec::new(),
409        };
410        sink.push(ev);
411        for m in &mut self.chain {
412            let prev: SmallVec<[Event; 2]> = sink.buffer.drain(..).collect();
413            for ev in prev {
414                m.feed_event(ev, module_info, &mut sink, source_loc)?;
415            }
416        }
417        if let Some(fcg) = fcg {
418            for ev in sink.buffer {
419                fcg.feed_event(ev, module_info, source_loc)
420                    .map_err(|x| format!("{:?}", x))?;
421            }
422        }
423
424        Ok(())
425    }
426}
427
428/// A trait that represents the signature required to implement middleware for a function.
429pub trait FunctionMiddleware {
430    /// The error type for this middleware's functions.
431    type Error: Debug;
432    /// Processes the given event, module info and sink.
433    fn feed_event<'a, 'b: 'a>(
434        &mut self,
435        op: Event<'a, 'b>,
436        module_info: &ModuleInfo,
437        sink: &mut EventSink<'a, 'b>,
438        source_loc: u32,
439    ) -> Result<(), Self::Error>;
440}
441
442pub(crate) trait GenericFunctionMiddleware {
443    fn feed_event<'a, 'b: 'a>(
444        &mut self,
445        op: Event<'a, 'b>,
446        module_info: &ModuleInfo,
447        sink: &mut EventSink<'a, 'b>,
448        source_loc: u32,
449    ) -> Result<(), String>;
450}
451
452impl<E: Debug, T: FunctionMiddleware<Error = E>> GenericFunctionMiddleware for T {
453    fn feed_event<'a, 'b: 'a>(
454        &mut self,
455        op: Event<'a, 'b>,
456        module_info: &ModuleInfo,
457        sink: &mut EventSink<'a, 'b>,
458        source_loc: u32,
459    ) -> Result<(), String> {
460        <Self as FunctionMiddleware>::feed_event(self, op, module_info, sink, source_loc)
461            .map_err(|x| format!("{:?}", x))
462    }
463}
464
465/// The function-scope code generator trait.
466pub trait FunctionCodeGenerator<E: Debug> {
467    /// Sets the return type.
468    fn feed_return(&mut self, ty: WpType) -> Result<(), E>;
469
470    /// Adds a parameter to the function.
471    fn feed_param(&mut self, ty: WpType) -> Result<(), E>;
472
473    /// Adds `n` locals to the function.
474    fn feed_local(&mut self, ty: WpType, n: usize, loc: u32) -> Result<(), E>;
475
476    /// Called before the first call to `feed_opcode`.
477    fn begin_body(&mut self, module_info: &ModuleInfo) -> Result<(), E>;
478
479    /// Called for each operator.
480    fn feed_event(&mut self, op: Event, module_info: &ModuleInfo, source_loc: u32)
481        -> Result<(), E>;
482
483    /// Finalizes the function.
484    fn finalize(&mut self) -> Result<(), E>;
485}