wasmtime_wizer/lib.rs
1//! Wizer: the WebAssembly pre-initializer!
2//!
3//! See the [`Wizer`] struct for details.
4
5#![deny(missing_docs)]
6#![cfg_attr(docsrs, feature(doc_cfg))]
7
8mod info;
9mod instrument;
10mod parse;
11mod rewrite;
12mod snapshot;
13
14#[cfg(feature = "wasmtime")]
15mod wasmtime;
16#[cfg(feature = "wasmtime")]
17pub use wasmtime::*;
18#[cfg(feature = "component-model")]
19mod component;
20#[cfg(feature = "component-model")]
21pub use component::*;
22
23pub use crate::info::ModuleContext;
24pub use crate::snapshot::SnapshotVal;
25use anyhow::Context;
26use std::collections::{HashMap, HashSet};
27
28const DEFAULT_KEEP_INIT_FUNC: bool = false;
29
30/// Wizer: the WebAssembly pre-initializer!
31///
32/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
33/// Wizer instantiates your WebAssembly module, executes its initialization
34/// function, and then serializes the instance's initialized state out into a
35/// new WebAssembly module. Now you can use this new, pre-initialized
36/// WebAssembly module to hit the ground running, without making your users wait
37/// for that first-time set up code to complete.
38///
39/// ## Caveats
40///
41/// * The initialization function may not call any imported functions. Doing so
42/// will trigger a trap and `wizer` will exit.
43///
44/// * The Wasm module may not import globals, tables, or memories.
45///
46/// * Reference types are not supported yet. This is tricky because it would
47/// allow the Wasm module to mutate tables, and we would need to be able to
48/// snapshot the new table state, but funcrefs and externrefs don't have
49/// identity and aren't comparable in the Wasm spec, which makes snapshotting
50/// difficult.
51#[derive(Clone, Debug)]
52#[cfg_attr(feature = "clap", derive(clap::Parser))]
53pub struct Wizer {
54 /// The Wasm export name of the function that should be executed to
55 /// initialize the Wasm module.
56 #[cfg_attr(
57 feature = "clap",
58 arg(short = 'f', long, default_value = "wizer-initialize")
59 )]
60 init_func: String,
61
62 /// Any function renamings to perform.
63 ///
64 /// A renaming specification `dst=src` renames a function export `src` to
65 /// `dst`, overwriting any previous `dst` export.
66 ///
67 /// Multiple renamings can be specified. It is an error to specify more than
68 /// one source to rename to a destination name, or to specify more than one
69 /// renaming destination for one source.
70 ///
71 /// This option can be used, for example, to replace a `_start` entry point
72 /// in an initialized module with an alternate entry point.
73 ///
74 /// When module linking is enabled, these renames are only applied to the
75 /// outermost module.
76 #[cfg_attr(
77 feature = "clap",
78 arg(
79 short = 'r',
80 long = "rename-func",
81 alias = "func-rename",
82 value_name = "dst=src",
83 value_parser = parse_rename,
84 ),
85 )]
86 func_renames: Vec<(String, String)>,
87
88 /// After initialization, should the Wasm module still export the
89 /// initialization function?
90 ///
91 /// This is `false` by default, meaning that the initialization function is
92 /// no longer exported from the Wasm module.
93 #[cfg_attr(
94 feature = "clap",
95 arg(long, require_equals = true, value_name = "true|false")
96 )]
97 keep_init_func: Option<Option<bool>>,
98}
99
100#[cfg(feature = "clap")]
101fn parse_rename(s: &str) -> anyhow::Result<(String, String)> {
102 let parts: Vec<&str> = s.splitn(2, '=').collect();
103 if parts.len() != 2 {
104 anyhow::bail!("must contain exactly one equals character ('=')");
105 }
106 Ok((parts[0].into(), parts[1].into()))
107}
108
109#[derive(Default)]
110struct FuncRenames {
111 /// For a given export name that we encounter in the original module, a map
112 /// to a new name, if any, to emit in the output module.
113 rename_src_to_dst: HashMap<String, String>,
114 /// A set of export names that we ignore in the original module (because
115 /// they are overwritten by renamings).
116 rename_dsts: HashSet<String>,
117}
118
119impl FuncRenames {
120 fn parse(renames: &[(String, String)]) -> anyhow::Result<FuncRenames> {
121 let mut ret = FuncRenames {
122 rename_src_to_dst: HashMap::new(),
123 rename_dsts: HashSet::new(),
124 };
125 if renames.is_empty() {
126 return Ok(ret);
127 }
128
129 for (dst, src) in renames {
130 if ret.rename_dsts.contains(dst) {
131 anyhow::bail!("Duplicated function rename dst {dst}");
132 }
133 if ret.rename_src_to_dst.contains_key(src) {
134 anyhow::bail!("Duplicated function rename src {src}");
135 }
136 ret.rename_dsts.insert(dst.clone());
137 ret.rename_src_to_dst.insert(src.clone(), dst.clone());
138 }
139
140 Ok(ret)
141 }
142}
143
144impl Wizer {
145 /// Construct a new `Wizer` builder.
146 pub fn new() -> Self {
147 Wizer {
148 init_func: "wizer-initialize".to_string(),
149 func_renames: vec![],
150 keep_init_func: None,
151 }
152 }
153
154 /// The export name of the initializer function.
155 ///
156 /// Defaults to `"wizer-initialize"`.
157 pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
158 self.init_func = init_func.into();
159 self
160 }
161
162 /// Returns the initialization function that will be run for wizer.
163 pub fn get_init_func(&self) -> &str {
164 &self.init_func
165 }
166
167 /// Add a function rename to perform.
168 pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
169 self.func_renames
170 .push((new_name.to_string(), old_name.to_string()));
171 self
172 }
173
174 /// After initialization, should the Wasm module still export the
175 /// initialization function?
176 ///
177 /// This is `false` by default, meaning that the initialization function is
178 /// no longer exported from the Wasm module.
179 pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
180 self.keep_init_func = Some(Some(keep));
181 self
182 }
183
184 /// First half of [`Self::run`] which instruments the provided `wasm` and
185 /// produces a new wasm module which should be run by a runtime.
186 ///
187 /// After the returned wasm is executed the context returned here and the
188 /// state of the instance should be passed to [`Self::snapshot`].
189 pub fn instrument<'a>(&self, wasm: &'a [u8]) -> anyhow::Result<(ModuleContext<'a>, Vec<u8>)> {
190 // Make sure we're given valid Wasm from the get go.
191 self.wasm_validate(&wasm)?;
192
193 let mut cx = parse::parse(wasm)?;
194
195 // When wizening core modules directly some imports aren't supported,
196 // so check for those here.
197 for import in cx.imports() {
198 match import.ty {
199 wasmparser::TypeRef::Global(_) => {
200 anyhow::bail!("imported globals are not supported")
201 }
202 wasmparser::TypeRef::Table(_) => {
203 anyhow::bail!("imported tables are not supported")
204 }
205 wasmparser::TypeRef::Memory(_) => {
206 anyhow::bail!("imported memories are not supported")
207 }
208 wasmparser::TypeRef::Func(_) => {}
209 wasmparser::TypeRef::FuncExact(_) => {}
210 wasmparser::TypeRef::Tag(_) => {}
211 }
212 }
213
214 let instrumented_wasm = instrument::instrument(&mut cx);
215 self.debug_assert_valid_wasm(&instrumented_wasm);
216
217 Ok((cx, instrumented_wasm))
218 }
219
220 /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
221 /// by [`Self::instrument`] and the state of the `instance` after it has
222 /// possibly executed its initialization function.
223 ///
224 /// This returns a new WebAssembly binary which has all state
225 /// pre-initialized.
226 pub async fn snapshot(
227 &self,
228 mut cx: ModuleContext<'_>,
229 instance: &mut impl InstanceState,
230 ) -> anyhow::Result<Vec<u8>> {
231 // Parse rename spec.
232 let renames = FuncRenames::parse(&self.func_renames)?;
233
234 let snapshot = snapshot::snapshot(&cx, instance).await;
235 let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames);
236
237 self.debug_assert_valid_wasm(&rewritten_wasm);
238
239 Ok(rewritten_wasm)
240 }
241
242 fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
243 if !cfg!(debug_assertions) {
244 return;
245 }
246 if let Err(error) = self.wasm_validate(&wasm) {
247 #[cfg(feature = "wasmprinter")]
248 let wat = wasmprinter::print_bytes(&wasm)
249 .unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
250 #[cfg(not(feature = "wasmprinter"))]
251 let wat = "`wasmprinter` cargo feature is not enabled".to_string();
252 panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
253 }
254 }
255
256 fn wasm_validate(&self, wasm: &[u8]) -> anyhow::Result<()> {
257 log::debug!("Validating input Wasm");
258
259 wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
260 .validate_all(wasm)
261 .context("wasm validation failed")?;
262
263 for payload in wasmparser::Parser::new(0).parse_all(wasm) {
264 match payload? {
265 wasmparser::Payload::CodeSectionEntry(code) => {
266 let mut ops = code.get_operators_reader()?;
267 while !ops.eof() {
268 match ops.read()? {
269 // Table mutations aren't allowed as wizer has no
270 // way to record a snapshot of a table at this time.
271 // The only table mutations allowed are those from
272 // active element segments which can be
273 // deterministically replayed, so disallow all other
274 // forms of mutating a table.
275 //
276 // Ideally Wizer could take a snapshot of a table
277 // post-instantiation and then ensure that after
278 // running initialization the table didn't get
279 // mutated, allowing these instructions, but that's
280 // also not possible at this time.
281 wasmparser::Operator::TableCopy { .. } => {
282 anyhow::bail!("unsupported `table.copy` instruction")
283 }
284 wasmparser::Operator::TableInit { .. } => {
285 anyhow::bail!("unsupported `table.init` instruction")
286 }
287 wasmparser::Operator::TableSet { .. } => {
288 anyhow::bail!("unsupported `table.set` instruction")
289 }
290 wasmparser::Operator::TableGrow { .. } => {
291 anyhow::bail!("unsupported `table.grow` instruction")
292 }
293 wasmparser::Operator::TableFill { .. } => {
294 anyhow::bail!("unsupported `table.fill` instruction")
295 }
296
297 // Wizer has no way of dynamically determining which
298 // element or data segments were dropped during
299 // execution so instead disallow these instructions
300 // entirely. Like above it'd be nice to allow them
301 // but just forbid their execution during the
302 // initialization function, but that can't be done
303 // easily at this time.
304 wasmparser::Operator::ElemDrop { .. } => {
305 anyhow::bail!("unsupported `elem.drop` instruction")
306 }
307 wasmparser::Operator::DataDrop { .. } => {
308 anyhow::bail!("unsupported `data.drop` instruction")
309 }
310
311 // Wizer can't snapshot GC references, so disallow
312 // any mutation of GC references. This prevents, for
313 // example, reading something from a table and then
314 // mutating it.
315 wasmparser::Operator::StructSet { .. } => {
316 anyhow::bail!("unsupported `struct.set` instruction")
317 }
318 wasmparser::Operator::ArraySet { .. } => {
319 anyhow::bail!("unsupported `array.set` instruction")
320 }
321 wasmparser::Operator::ArrayFill { .. } => {
322 anyhow::bail!("unsupported `array.fill` instruction")
323 }
324 wasmparser::Operator::ArrayCopy { .. } => {
325 anyhow::bail!("unsupported `array.copy` instruction")
326 }
327 wasmparser::Operator::ArrayInitData { .. } => {
328 anyhow::bail!("unsupported `array.init_data` instruction")
329 }
330 wasmparser::Operator::ArrayInitElem { .. } => {
331 anyhow::bail!("unsupported `array.init_elem` instruction")
332 }
333
334 _ => continue,
335 }
336 }
337 }
338 wasmparser::Payload::GlobalSection(globals) => {
339 for g in globals {
340 let g = g?.ty;
341 if !g.mutable {
342 continue;
343 }
344 match g.content_type {
345 wasmparser::ValType::I32
346 | wasmparser::ValType::I64
347 | wasmparser::ValType::F32
348 | wasmparser::ValType::F64
349 | wasmparser::ValType::V128 => {}
350 wasmparser::ValType::Ref(_) => {
351 anyhow::bail!(
352 "unsupported mutable global containing a reference type"
353 )
354 }
355 }
356 }
357 }
358 _ => {}
359 }
360 }
361
362 Ok(())
363 }
364
365 fn get_keep_init_func(&self) -> bool {
366 match self.keep_init_func {
367 Some(keep) => keep.unwrap_or(true),
368 None => DEFAULT_KEEP_INIT_FUNC,
369 }
370 }
371}
372
373/// Abstract ability to load state from a WebAssembly instance after it's been
374/// instantiated and some exports have run.
375pub trait InstanceState {
376 /// Loads the global specified by `name`, returning a `SnapshotVal`.
377 ///
378 /// # Panics
379 ///
380 /// This function panics if `name` isn't an exported global or if the type
381 /// of the global doesn't fit in `SnapshotVal`.
382 fn global_get(&mut self, name: &str) -> impl Future<Output = SnapshotVal> + Send;
383
384 /// Loads the contents of the memory specified by `name`, returning the
385 /// entier contents as a `Vec<u8>`.
386 ///
387 /// # Panics
388 ///
389 /// This function panics if `name` isn't an exported memory.
390 fn memory_contents(
391 &mut self,
392 name: &str,
393 contents: impl FnOnce(&[u8]) + Send,
394 ) -> impl Future<Output = ()> + Send;
395}