wasmtime_wizer/lib.rs
1//! Wizer: the WebAssembly pre-initializer!
2//!
3//! See the [`Wizer`] struct for details.
4
5#![deny(missing_docs)]
6#![cfg_attr(docsrs, feature(doc_cfg))]
7
8mod info;
9mod instrument;
10mod parse;
11mod rewrite;
12mod snapshot;
13
14#[cfg(feature = "wasmtime")]
15mod wasmtime;
16#[cfg(feature = "wasmtime")]
17pub use wasmtime::*;
18#[cfg(feature = "component-model")]
19mod component;
20#[cfg(feature = "component-model")]
21pub use component::*;
22#[cfg(not(feature = "rayon"))]
23mod rayoff;
24
25pub use crate::info::ModuleContext;
26pub use crate::snapshot::SnapshotVal;
27use ::wasmtime::{Result, bail, error::Context as _};
28use std::collections::{HashMap, HashSet};
29pub use wasmparser::ValType;
30
31const DEFAULT_KEEP_INIT_FUNC: bool = false;
32
33/// Wizer: the WebAssembly pre-initializer!
34///
35/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
36/// Wizer instantiates your WebAssembly module, executes its initialization
37/// function, and then serializes the instance's initialized state out into a
38/// new WebAssembly module. Now you can use this new, pre-initialized
39/// WebAssembly module to hit the ground running, without making your users wait
40/// for that first-time set up code to complete.
41///
42/// ## Caveats
43///
44/// * The initialization function may not call any imported functions. Doing so
45/// will trigger a trap and `wizer` will exit.
46///
47/// * The Wasm module may not import globals, tables, or memories.
48///
49/// * Reference types are not supported yet. This is tricky because it would
50/// allow the Wasm module to mutate tables, and we would need to be able to
51/// snapshot the new table state, but funcrefs and externrefs don't have
52/// identity and aren't comparable in the Wasm spec, which makes snapshotting
53/// difficult.
54#[derive(Clone, Debug)]
55#[cfg_attr(feature = "clap", derive(clap::Parser))]
56pub struct Wizer {
57 /// The Wasm export name of the function that should be executed to
58 /// initialize the Wasm module.
59 #[cfg_attr(
60 feature = "clap",
61 arg(short = 'f', long, default_value = "wizer-initialize")
62 )]
63 init_func: String,
64
65 /// Any function renamings to perform.
66 ///
67 /// A renaming specification `dst=src` renames a function export `src` to
68 /// `dst`, overwriting any previous `dst` export.
69 ///
70 /// Multiple renamings can be specified. It is an error to specify more than
71 /// one source to rename to a destination name, or to specify more than one
72 /// renaming destination for one source.
73 ///
74 /// This option can be used, for example, to replace a `_start` entry point
75 /// in an initialized module with an alternate entry point.
76 ///
77 /// When module linking is enabled, these renames are only applied to the
78 /// outermost module.
79 #[cfg_attr(
80 feature = "clap",
81 arg(
82 short = 'r',
83 long = "rename-func",
84 alias = "func-rename",
85 value_name = "dst=src",
86 value_parser = parse_rename,
87 ),
88 )]
89 func_renames: Vec<(String, String)>,
90
91 /// After initialization, should the Wasm module still export the
92 /// initialization function?
93 ///
94 /// This is `false` by default, meaning that the initialization function is
95 /// no longer exported from the Wasm module.
96 #[cfg_attr(
97 feature = "clap",
98 arg(long, require_equals = true, value_name = "true|false")
99 )]
100 keep_init_func: Option<Option<bool>>,
101}
102
103#[cfg(feature = "clap")]
104fn parse_rename(s: &str) -> Result<(String, String)> {
105 let parts: Vec<&str> = s.splitn(2, '=').collect();
106 if parts.len() != 2 {
107 bail!("must contain exactly one equals character ('=')");
108 }
109 Ok((parts[0].into(), parts[1].into()))
110}
111
112#[derive(Default)]
113struct FuncRenames {
114 /// For a given export name that we encounter in the original module, a map
115 /// to a new name, if any, to emit in the output module.
116 rename_src_to_dst: HashMap<String, String>,
117 /// A set of export names that we ignore in the original module (because
118 /// they are overwritten by renamings).
119 rename_dsts: HashSet<String>,
120}
121
122impl FuncRenames {
123 fn parse(renames: &[(String, String)]) -> Result<FuncRenames> {
124 let mut ret = FuncRenames {
125 rename_src_to_dst: HashMap::new(),
126 rename_dsts: HashSet::new(),
127 };
128 if renames.is_empty() {
129 return Ok(ret);
130 }
131
132 for (dst, src) in renames {
133 if ret.rename_dsts.contains(dst) {
134 bail!("Duplicated function rename dst {dst}");
135 }
136 if ret.rename_src_to_dst.contains_key(src) {
137 bail!("Duplicated function rename src {src}");
138 }
139 ret.rename_dsts.insert(dst.clone());
140 ret.rename_src_to_dst.insert(src.clone(), dst.clone());
141 }
142
143 Ok(ret)
144 }
145}
146
147impl Wizer {
148 /// Construct a new `Wizer` builder.
149 pub fn new() -> Self {
150 Wizer {
151 init_func: "wizer-initialize".to_string(),
152 func_renames: vec![],
153 keep_init_func: None,
154 }
155 }
156
157 /// The export name of the initializer function.
158 ///
159 /// Defaults to `"wizer-initialize"`.
160 pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
161 self.init_func = init_func.into();
162 self
163 }
164
165 /// Returns the initialization function that will be run for wizer.
166 pub fn get_init_func(&self) -> &str {
167 &self.init_func
168 }
169
170 /// Add a function rename to perform.
171 pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
172 self.func_renames
173 .push((new_name.to_string(), old_name.to_string()));
174 self
175 }
176
177 /// After initialization, should the Wasm module still export the
178 /// initialization function?
179 ///
180 /// This is `false` by default, meaning that the initialization function is
181 /// no longer exported from the Wasm module.
182 pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
183 self.keep_init_func = Some(Some(keep));
184 self
185 }
186
187 /// First half of [`Self::run`] which instruments the provided `wasm` and
188 /// produces a new wasm module which should be run by a runtime.
189 ///
190 /// After the returned wasm is executed the context returned here and the
191 /// state of the instance should be passed to [`Self::snapshot`].
192 pub fn instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)> {
193 // Make sure we're given valid Wasm from the get go.
194 self.wasm_validate(&wasm)?;
195
196 let mut cx = parse::parse(wasm)?;
197
198 // When wizening core modules directly some imports aren't supported,
199 // so check for those here.
200 for import in cx.imports() {
201 match import.ty {
202 wasmparser::TypeRef::Global(_) => {
203 bail!("imported globals are not supported")
204 }
205 wasmparser::TypeRef::Table(_) => {
206 bail!("imported tables are not supported")
207 }
208 wasmparser::TypeRef::Memory(_) => {
209 bail!("imported memories are not supported")
210 }
211 wasmparser::TypeRef::Func(_) => {}
212 wasmparser::TypeRef::FuncExact(_) => {}
213 wasmparser::TypeRef::Tag(_) => {}
214 }
215 }
216
217 let instrumented_wasm = instrument::instrument(&mut cx);
218 self.debug_assert_valid_wasm(&instrumented_wasm);
219
220 Ok((cx, instrumented_wasm))
221 }
222
223 /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
224 /// by [`Self::instrument`] and the state of the `instance` after it has
225 /// possibly executed its initialization function.
226 ///
227 /// This returns a new WebAssembly binary which has all state
228 /// pre-initialized.
229 pub async fn snapshot(
230 &self,
231 mut cx: ModuleContext<'_>,
232 instance: &mut impl InstanceState,
233 ) -> Result<Vec<u8>> {
234 // Parse rename spec.
235 let renames = FuncRenames::parse(&self.func_renames)?;
236
237 let snapshot = snapshot::snapshot(&cx, instance).await;
238 let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames, true);
239
240 self.debug_assert_valid_wasm(&rewritten_wasm);
241
242 Ok(rewritten_wasm)
243 }
244
245 fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
246 if !cfg!(debug_assertions) {
247 return;
248 }
249 if let Err(error) = self.wasm_validate(&wasm) {
250 #[cfg(feature = "wasmprinter")]
251 let wat = wasmprinter::print_bytes(&wasm)
252 .unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
253 #[cfg(not(feature = "wasmprinter"))]
254 let wat = "`wasmprinter` cargo feature is not enabled".to_string();
255 panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
256 }
257 }
258
259 fn wasm_validate(&self, wasm: &[u8]) -> Result<()> {
260 log::debug!("Validating input Wasm");
261
262 wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
263 .validate_all(wasm)
264 .context("wasm validation failed")?;
265
266 for payload in wasmparser::Parser::new(0).parse_all(wasm) {
267 match payload? {
268 wasmparser::Payload::CodeSectionEntry(code) => {
269 let mut ops = code.get_operators_reader()?;
270 while !ops.eof() {
271 match ops.read()? {
272 // Table mutations aren't allowed as wizer has no
273 // way to record a snapshot of a table at this time.
274 // The only table mutations allowed are those from
275 // active element segments which can be
276 // deterministically replayed, so disallow all other
277 // forms of mutating a table.
278 //
279 // Ideally Wizer could take a snapshot of a table
280 // post-instantiation and then ensure that after
281 // running initialization the table didn't get
282 // mutated, allowing these instructions, but that's
283 // also not possible at this time.
284 wasmparser::Operator::TableCopy { .. } => {
285 bail!("unsupported `table.copy` instruction")
286 }
287 wasmparser::Operator::TableInit { .. } => {
288 bail!("unsupported `table.init` instruction")
289 }
290 wasmparser::Operator::TableSet { .. } => {
291 bail!("unsupported `table.set` instruction")
292 }
293 wasmparser::Operator::TableGrow { .. } => {
294 bail!("unsupported `table.grow` instruction")
295 }
296 wasmparser::Operator::TableFill { .. } => {
297 bail!("unsupported `table.fill` instruction")
298 }
299
300 // Wizer has no way of dynamically determining which
301 // element or data segments were dropped during
302 // execution so instead disallow these instructions
303 // entirely. Like above it'd be nice to allow them
304 // but just forbid their execution during the
305 // initialization function, but that can't be done
306 // easily at this time.
307 wasmparser::Operator::ElemDrop { .. } => {
308 bail!("unsupported `elem.drop` instruction")
309 }
310 wasmparser::Operator::DataDrop { .. } => {
311 bail!("unsupported `data.drop` instruction")
312 }
313
314 // Wizer can't snapshot GC references, so disallow
315 // any mutation of GC references. This prevents, for
316 // example, reading something from a table and then
317 // mutating it.
318 wasmparser::Operator::StructSet { .. } => {
319 bail!("unsupported `struct.set` instruction")
320 }
321 wasmparser::Operator::ArraySet { .. } => {
322 bail!("unsupported `array.set` instruction")
323 }
324 wasmparser::Operator::ArrayFill { .. } => {
325 bail!("unsupported `array.fill` instruction")
326 }
327 wasmparser::Operator::ArrayCopy { .. } => {
328 bail!("unsupported `array.copy` instruction")
329 }
330 wasmparser::Operator::ArrayInitData { .. } => {
331 bail!("unsupported `array.init_data` instruction")
332 }
333 wasmparser::Operator::ArrayInitElem { .. } => {
334 bail!("unsupported `array.init_elem` instruction")
335 }
336
337 _ => continue,
338 }
339 }
340 }
341 wasmparser::Payload::GlobalSection(globals) => {
342 for g in globals {
343 let g = g?.ty;
344 if !g.mutable {
345 continue;
346 }
347 match g.content_type {
348 wasmparser::ValType::I32
349 | wasmparser::ValType::I64
350 | wasmparser::ValType::F32
351 | wasmparser::ValType::F64
352 | wasmparser::ValType::V128 => {}
353 wasmparser::ValType::Ref(_) => {
354 bail!("unsupported mutable global containing a reference type")
355 }
356 }
357 }
358 }
359 _ => {}
360 }
361 }
362
363 Ok(())
364 }
365
366 fn get_keep_init_func(&self) -> bool {
367 match self.keep_init_func {
368 Some(keep) => keep.unwrap_or(true),
369 None => DEFAULT_KEEP_INIT_FUNC,
370 }
371 }
372}
373
374/// Abstract ability to load state from a WebAssembly instance after it's been
375/// instantiated and some exports have run.
376pub trait InstanceState {
377 /// Loads the global specified by `name`, returning a `SnapshotVal`.
378 ///
379 /// # Panics
380 ///
381 /// This function panics if `name` isn't an exported global or if the type
382 /// of the global doesn't fit in `SnapshotVal`.
383 fn global_get(
384 &mut self,
385 name: &str,
386 type_hint: ValType,
387 ) -> impl Future<Output = SnapshotVal> + Send;
388
389 /// Loads the contents of the memory specified by `name`, returning the
390 /// entier contents as a `Vec<u8>`.
391 ///
392 /// # Panics
393 ///
394 /// This function panics if `name` isn't an exported memory.
395 fn memory_contents(
396 &mut self,
397 name: &str,
398 contents: impl FnOnce(&[u8]) + Send,
399 ) -> impl Future<Output = ()> + Send;
400}