wasmtime_wizer/
lib.rs

1//! Wizer: the WebAssembly pre-initializer!
2//!
3//! See the [`Wizer`] struct for details.
4
5#![deny(missing_docs)]
6#![cfg_attr(docsrs, feature(doc_cfg))]
7
8mod info;
9mod instrument;
10mod parse;
11mod rewrite;
12mod snapshot;
13
14#[cfg(feature = "wasmtime")]
15mod wasmtime;
16#[cfg(feature = "wasmtime")]
17pub use wasmtime::*;
18#[cfg(feature = "component-model")]
19mod component;
20#[cfg(feature = "component-model")]
21pub use component::*;
22
23pub use crate::info::ModuleContext;
24pub use crate::snapshot::SnapshotVal;
25use anyhow::Context;
26use std::collections::{HashMap, HashSet};
27
28const DEFAULT_KEEP_INIT_FUNC: bool = false;
29
30/// Wizer: the WebAssembly pre-initializer!
31///
32/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
33/// Wizer instantiates your WebAssembly module, executes its initialization
34/// function, and then serializes the instance's initialized state out into a
35/// new WebAssembly module. Now you can use this new, pre-initialized
36/// WebAssembly module to hit the ground running, without making your users wait
37/// for that first-time set up code to complete.
38///
39/// ## Caveats
40///
41/// * The initialization function may not call any imported functions. Doing so
42///   will trigger a trap and `wizer` will exit.
43///
44/// * The Wasm module may not import globals, tables, or memories.
45///
46/// * Reference types are not supported yet. This is tricky because it would
47///   allow the Wasm module to mutate tables, and we would need to be able to
48///   snapshot the new table state, but funcrefs and externrefs don't have
49///   identity and aren't comparable in the Wasm spec, which makes snapshotting
50///   difficult.
51#[derive(Clone, Debug)]
52#[cfg_attr(feature = "clap", derive(clap::Parser))]
53pub struct Wizer {
54    /// The Wasm export name of the function that should be executed to
55    /// initialize the Wasm module.
56    #[cfg_attr(
57        feature = "clap",
58        arg(short = 'f', long, default_value = "wizer-initialize")
59    )]
60    init_func: String,
61
62    /// Any function renamings to perform.
63    ///
64    /// A renaming specification `dst=src` renames a function export `src` to
65    /// `dst`, overwriting any previous `dst` export.
66    ///
67    /// Multiple renamings can be specified. It is an error to specify more than
68    /// one source to rename to a destination name, or to specify more than one
69    /// renaming destination for one source.
70    ///
71    /// This option can be used, for example, to replace a `_start` entry point
72    /// in an initialized module with an alternate entry point.
73    ///
74    /// When module linking is enabled, these renames are only applied to the
75    /// outermost module.
76    #[cfg_attr(
77        feature = "clap",
78        arg(
79            short = 'r',
80            long = "rename-func",
81            alias = "func-rename",
82            value_name = "dst=src",
83            value_parser = parse_rename,
84        ),
85    )]
86    func_renames: Vec<(String, String)>,
87
88    /// After initialization, should the Wasm module still export the
89    /// initialization function?
90    ///
91    /// This is `false` by default, meaning that the initialization function is
92    /// no longer exported from the Wasm module.
93    #[cfg_attr(
94        feature = "clap",
95        arg(long, require_equals = true, value_name = "true|false")
96    )]
97    keep_init_func: Option<Option<bool>>,
98}
99
100#[cfg(feature = "clap")]
101fn parse_rename(s: &str) -> anyhow::Result<(String, String)> {
102    let parts: Vec<&str> = s.splitn(2, '=').collect();
103    if parts.len() != 2 {
104        anyhow::bail!("must contain exactly one equals character ('=')");
105    }
106    Ok((parts[0].into(), parts[1].into()))
107}
108
109#[derive(Default)]
110struct FuncRenames {
111    /// For a given export name that we encounter in the original module, a map
112    /// to a new name, if any, to emit in the output module.
113    rename_src_to_dst: HashMap<String, String>,
114    /// A set of export names that we ignore in the original module (because
115    /// they are overwritten by renamings).
116    rename_dsts: HashSet<String>,
117}
118
119impl FuncRenames {
120    fn parse(renames: &[(String, String)]) -> anyhow::Result<FuncRenames> {
121        let mut ret = FuncRenames {
122            rename_src_to_dst: HashMap::new(),
123            rename_dsts: HashSet::new(),
124        };
125        if renames.is_empty() {
126            return Ok(ret);
127        }
128
129        for (dst, src) in renames {
130            if ret.rename_dsts.contains(dst) {
131                anyhow::bail!("Duplicated function rename dst {dst}");
132            }
133            if ret.rename_src_to_dst.contains_key(src) {
134                anyhow::bail!("Duplicated function rename src {src}");
135            }
136            ret.rename_dsts.insert(dst.clone());
137            ret.rename_src_to_dst.insert(src.clone(), dst.clone());
138        }
139
140        Ok(ret)
141    }
142}
143
144impl Wizer {
145    /// Construct a new `Wizer` builder.
146    pub fn new() -> Self {
147        Wizer {
148            init_func: "wizer-initialize".to_string(),
149            func_renames: vec![],
150            keep_init_func: None,
151        }
152    }
153
154    /// The export name of the initializer function.
155    ///
156    /// Defaults to `"wizer-initialize"`.
157    pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
158        self.init_func = init_func.into();
159        self
160    }
161
162    /// Returns the initialization function that will be run for wizer.
163    pub fn get_init_func(&self) -> &str {
164        &self.init_func
165    }
166
167    /// Add a function rename to perform.
168    pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
169        self.func_renames
170            .push((new_name.to_string(), old_name.to_string()));
171        self
172    }
173
174    /// After initialization, should the Wasm module still export the
175    /// initialization function?
176    ///
177    /// This is `false` by default, meaning that the initialization function is
178    /// no longer exported from the Wasm module.
179    pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
180        self.keep_init_func = Some(Some(keep));
181        self
182    }
183
184    /// First half of [`Self::run`] which instruments the provided `wasm` and
185    /// produces a new wasm module which should be run by a runtime.
186    ///
187    /// After the returned wasm is executed the context returned here and the
188    /// state of the instance should be passed to [`Self::snapshot`].
189    pub fn instrument<'a>(&self, wasm: &'a [u8]) -> anyhow::Result<(ModuleContext<'a>, Vec<u8>)> {
190        // Make sure we're given valid Wasm from the get go.
191        self.wasm_validate(&wasm)?;
192
193        let mut cx = parse::parse(wasm)?;
194
195        // When wizening core modules directly some imports aren't supported,
196        // so check for those here.
197        for import in cx.imports() {
198            match import.ty {
199                wasmparser::TypeRef::Global(_) => {
200                    anyhow::bail!("imported globals are not supported")
201                }
202                wasmparser::TypeRef::Table(_) => {
203                    anyhow::bail!("imported tables are not supported")
204                }
205                wasmparser::TypeRef::Memory(_) => {
206                    anyhow::bail!("imported memories are not supported")
207                }
208                wasmparser::TypeRef::Func(_) => {}
209                wasmparser::TypeRef::Tag(_) => {}
210            }
211        }
212
213        let instrumented_wasm = instrument::instrument(&mut cx);
214        self.debug_assert_valid_wasm(&instrumented_wasm);
215
216        Ok((cx, instrumented_wasm))
217    }
218
219    /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
220    /// by [`Self::instrument`] and the state of the `instance` after it has
221    /// possibly executed its initialization function.
222    ///
223    /// This returns a new WebAssembly binary which has all state
224    /// pre-initialized.
225    pub async fn snapshot(
226        &self,
227        mut cx: ModuleContext<'_>,
228        instance: &mut impl InstanceState,
229    ) -> anyhow::Result<Vec<u8>> {
230        // Parse rename spec.
231        let renames = FuncRenames::parse(&self.func_renames)?;
232
233        let snapshot = snapshot::snapshot(&cx, instance).await;
234        let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames);
235
236        self.debug_assert_valid_wasm(&rewritten_wasm);
237
238        Ok(rewritten_wasm)
239    }
240
241    fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
242        if !cfg!(debug_assertions) {
243            return;
244        }
245        if let Err(error) = self.wasm_validate(&wasm) {
246            #[cfg(feature = "wasmprinter")]
247            let wat = wasmprinter::print_bytes(&wasm)
248                .unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
249            #[cfg(not(feature = "wasmprinter"))]
250            let wat = "`wasmprinter` cargo feature is not enabled".to_string();
251            panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
252        }
253    }
254
255    fn wasm_validate(&self, wasm: &[u8]) -> anyhow::Result<()> {
256        log::debug!("Validating input Wasm");
257
258        wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
259            .validate_all(wasm)
260            .context("wasm validation failed")?;
261
262        for payload in wasmparser::Parser::new(0).parse_all(wasm) {
263            match payload? {
264                wasmparser::Payload::CodeSectionEntry(code) => {
265                    let mut ops = code.get_operators_reader()?;
266                    while !ops.eof() {
267                        match ops.read()? {
268                            // Table mutations aren't allowed as wizer has no
269                            // way to record a snapshot of a table at this time.
270                            // The only table mutations allowed are those from
271                            // active element segments which can be
272                            // deterministically replayed, so disallow all other
273                            // forms of mutating a table.
274                            //
275                            // Ideally Wizer could take a snapshot of a table
276                            // post-instantiation and then ensure that after
277                            // running initialization the table didn't get
278                            // mutated, allowing these instructions, but that's
279                            // also not possible at this time.
280                            wasmparser::Operator::TableCopy { .. } => {
281                                anyhow::bail!("unsupported `table.copy` instruction")
282                            }
283                            wasmparser::Operator::TableInit { .. } => {
284                                anyhow::bail!("unsupported `table.init` instruction")
285                            }
286                            wasmparser::Operator::TableSet { .. } => {
287                                anyhow::bail!("unsupported `table.set` instruction")
288                            }
289                            wasmparser::Operator::TableGrow { .. } => {
290                                anyhow::bail!("unsupported `table.grow` instruction")
291                            }
292                            wasmparser::Operator::TableFill { .. } => {
293                                anyhow::bail!("unsupported `table.fill` instruction")
294                            }
295
296                            // Wizer has no way of dynamically determining which
297                            // element or data segments were dropped during
298                            // execution so instead disallow these instructions
299                            // entirely. Like above it'd be nice to allow them
300                            // but just forbid their execution during the
301                            // initialization function, but that can't be done
302                            // easily at this time.
303                            wasmparser::Operator::ElemDrop { .. } => {
304                                anyhow::bail!("unsupported `elem.drop` instruction")
305                            }
306                            wasmparser::Operator::DataDrop { .. } => {
307                                anyhow::bail!("unsupported `data.drop` instruction")
308                            }
309
310                            // Wizer can't snapshot GC references, so disallow
311                            // any mutation of GC references. This prevents, for
312                            // example, reading something from a table and then
313                            // mutating it.
314                            wasmparser::Operator::StructSet { .. } => {
315                                anyhow::bail!("unsupported `struct.set` instruction")
316                            }
317                            wasmparser::Operator::ArraySet { .. } => {
318                                anyhow::bail!("unsupported `array.set` instruction")
319                            }
320                            wasmparser::Operator::ArrayFill { .. } => {
321                                anyhow::bail!("unsupported `array.fill` instruction")
322                            }
323                            wasmparser::Operator::ArrayCopy { .. } => {
324                                anyhow::bail!("unsupported `array.copy` instruction")
325                            }
326                            wasmparser::Operator::ArrayInitData { .. } => {
327                                anyhow::bail!("unsupported `array.init_data` instruction")
328                            }
329                            wasmparser::Operator::ArrayInitElem { .. } => {
330                                anyhow::bail!("unsupported `array.init_elem` instruction")
331                            }
332
333                            _ => continue,
334                        }
335                    }
336                }
337                wasmparser::Payload::GlobalSection(globals) => {
338                    for g in globals {
339                        let g = g?.ty;
340                        if !g.mutable {
341                            continue;
342                        }
343                        match g.content_type {
344                            wasmparser::ValType::I32
345                            | wasmparser::ValType::I64
346                            | wasmparser::ValType::F32
347                            | wasmparser::ValType::F64
348                            | wasmparser::ValType::V128 => {}
349                            wasmparser::ValType::Ref(_) => {
350                                anyhow::bail!(
351                                    "unsupported mutable global containing a reference type"
352                                )
353                            }
354                        }
355                    }
356                }
357                _ => {}
358            }
359        }
360
361        Ok(())
362    }
363
364    fn get_keep_init_func(&self) -> bool {
365        match self.keep_init_func {
366            Some(keep) => keep.unwrap_or(true),
367            None => DEFAULT_KEEP_INIT_FUNC,
368        }
369    }
370}
371
372/// Abstract ability to load state from a WebAssembly instance after it's been
373/// instantiated and some exports have run.
374pub trait InstanceState {
375    /// Loads the global specified by `name`, returning a `SnapshotVal`.
376    ///
377    /// # Panics
378    ///
379    /// This function panics if `name` isn't an exported global or if the type
380    /// of the global doesn't fit in `SnapshotVal`.
381    fn global_get(&mut self, name: &str) -> impl Future<Output = SnapshotVal> + Send;
382
383    /// Loads the contents of the memory specified by `name`, returning the
384    /// entier contents as a `Vec<u8>`.
385    ///
386    /// # Panics
387    ///
388    /// This function panics if `name` isn't an exported memory.
389    fn memory_contents(
390        &mut self,
391        name: &str,
392        contents: impl FnOnce(&[u8]) + Send,
393    ) -> impl Future<Output = ()> + Send;
394}