wasmtime_wizer/lib.rs
1//! Wizer: the WebAssembly pre-initializer!
2//!
3//! See the [`Wizer`] struct for details.
4
5#![deny(missing_docs)]
6#![cfg_attr(docsrs, feature(doc_cfg))]
7
8mod info;
9mod instrument;
10mod parse;
11mod rewrite;
12mod snapshot;
13
14#[cfg(feature = "wasmtime")]
15mod wasmtime;
16#[cfg(feature = "wasmtime")]
17pub use wasmtime::*;
18#[cfg(feature = "component-model")]
19mod component;
20#[cfg(feature = "component-model")]
21pub use component::*;
22
23pub use crate::info::ModuleContext;
24pub use crate::snapshot::SnapshotVal;
25use anyhow::Context;
26use std::collections::{HashMap, HashSet};
27
28const DEFAULT_KEEP_INIT_FUNC: bool = false;
29
30/// Wizer: the WebAssembly pre-initializer!
31///
32/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
33/// Wizer instantiates your WebAssembly module, executes its initialization
34/// function, and then serializes the instance's initialized state out into a
35/// new WebAssembly module. Now you can use this new, pre-initialized
36/// WebAssembly module to hit the ground running, without making your users wait
37/// for that first-time set up code to complete.
38///
39/// ## Caveats
40///
41/// * The initialization function may not call any imported functions. Doing so
42/// will trigger a trap and `wizer` will exit.
43///
44/// * The Wasm module may not import globals, tables, or memories.
45///
46/// * Reference types are not supported yet. This is tricky because it would
47/// allow the Wasm module to mutate tables, and we would need to be able to
48/// snapshot the new table state, but funcrefs and externrefs don't have
49/// identity and aren't comparable in the Wasm spec, which makes snapshotting
50/// difficult.
51#[derive(Clone, Debug)]
52#[cfg_attr(feature = "clap", derive(clap::Parser))]
53pub struct Wizer {
54 /// The Wasm export name of the function that should be executed to
55 /// initialize the Wasm module.
56 #[cfg_attr(
57 feature = "clap",
58 arg(short = 'f', long, default_value = "wizer-initialize")
59 )]
60 init_func: String,
61
62 /// Any function renamings to perform.
63 ///
64 /// A renaming specification `dst=src` renames a function export `src` to
65 /// `dst`, overwriting any previous `dst` export.
66 ///
67 /// Multiple renamings can be specified. It is an error to specify more than
68 /// one source to rename to a destination name, or to specify more than one
69 /// renaming destination for one source.
70 ///
71 /// This option can be used, for example, to replace a `_start` entry point
72 /// in an initialized module with an alternate entry point.
73 ///
74 /// When module linking is enabled, these renames are only applied to the
75 /// outermost module.
76 #[cfg_attr(
77 feature = "clap",
78 arg(
79 short = 'r',
80 long = "rename-func",
81 alias = "func-rename",
82 value_name = "dst=src",
83 value_parser = parse_rename,
84 ),
85 )]
86 func_renames: Vec<(String, String)>,
87
88 /// After initialization, should the Wasm module still export the
89 /// initialization function?
90 ///
91 /// This is `false` by default, meaning that the initialization function is
92 /// no longer exported from the Wasm module.
93 #[cfg_attr(
94 feature = "clap",
95 arg(long, require_equals = true, value_name = "true|false")
96 )]
97 keep_init_func: Option<Option<bool>>,
98}
99
100#[cfg(feature = "clap")]
101fn parse_rename(s: &str) -> anyhow::Result<(String, String)> {
102 let parts: Vec<&str> = s.splitn(2, '=').collect();
103 if parts.len() != 2 {
104 anyhow::bail!("must contain exactly one equals character ('=')");
105 }
106 Ok((parts[0].into(), parts[1].into()))
107}
108
109#[derive(Default)]
110struct FuncRenames {
111 /// For a given export name that we encounter in the original module, a map
112 /// to a new name, if any, to emit in the output module.
113 rename_src_to_dst: HashMap<String, String>,
114 /// A set of export names that we ignore in the original module (because
115 /// they are overwritten by renamings).
116 rename_dsts: HashSet<String>,
117}
118
119impl FuncRenames {
120 fn parse(renames: &[(String, String)]) -> anyhow::Result<FuncRenames> {
121 let mut ret = FuncRenames {
122 rename_src_to_dst: HashMap::new(),
123 rename_dsts: HashSet::new(),
124 };
125 if renames.is_empty() {
126 return Ok(ret);
127 }
128
129 for (dst, src) in renames {
130 if ret.rename_dsts.contains(dst) {
131 anyhow::bail!("Duplicated function rename dst {dst}");
132 }
133 if ret.rename_src_to_dst.contains_key(src) {
134 anyhow::bail!("Duplicated function rename src {src}");
135 }
136 ret.rename_dsts.insert(dst.clone());
137 ret.rename_src_to_dst.insert(src.clone(), dst.clone());
138 }
139
140 Ok(ret)
141 }
142}
143
144impl Wizer {
145 /// Construct a new `Wizer` builder.
146 pub fn new() -> Self {
147 Wizer {
148 init_func: "wizer-initialize".to_string(),
149 func_renames: vec![],
150 keep_init_func: None,
151 }
152 }
153
154 /// The export name of the initializer function.
155 ///
156 /// Defaults to `"wizer-initialize"`.
157 pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
158 self.init_func = init_func.into();
159 self
160 }
161
162 /// Returns the initialization function that will be run for wizer.
163 pub fn get_init_func(&self) -> &str {
164 &self.init_func
165 }
166
167 /// Add a function rename to perform.
168 pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
169 self.func_renames
170 .push((new_name.to_string(), old_name.to_string()));
171 self
172 }
173
174 /// After initialization, should the Wasm module still export the
175 /// initialization function?
176 ///
177 /// This is `false` by default, meaning that the initialization function is
178 /// no longer exported from the Wasm module.
179 pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
180 self.keep_init_func = Some(Some(keep));
181 self
182 }
183
184 /// First half of [`Self::run`] which instruments the provided `wasm` and
185 /// produces a new wasm module which should be run by a runtime.
186 ///
187 /// After the returned wasm is executed the context returned here and the
188 /// state of the instance should be passed to [`Self::snapshot`].
189 pub fn instrument<'a>(&self, wasm: &'a [u8]) -> anyhow::Result<(ModuleContext<'a>, Vec<u8>)> {
190 // Make sure we're given valid Wasm from the get go.
191 self.wasm_validate(&wasm)?;
192
193 let mut cx = parse::parse(wasm)?;
194
195 // When wizening core modules directly some imports aren't supported,
196 // so check for those here.
197 for import in cx.imports() {
198 match import.ty {
199 wasmparser::TypeRef::Global(_) => {
200 anyhow::bail!("imported globals are not supported")
201 }
202 wasmparser::TypeRef::Table(_) => {
203 anyhow::bail!("imported tables are not supported")
204 }
205 wasmparser::TypeRef::Memory(_) => {
206 anyhow::bail!("imported memories are not supported")
207 }
208 wasmparser::TypeRef::Func(_) => {}
209 wasmparser::TypeRef::Tag(_) => {}
210 }
211 }
212
213 let instrumented_wasm = instrument::instrument(&mut cx);
214 self.debug_assert_valid_wasm(&instrumented_wasm);
215
216 Ok((cx, instrumented_wasm))
217 }
218
219 /// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
220 /// by [`Self::instrument`] and the state of the `instance` after it has
221 /// possibly executed its initialization function.
222 ///
223 /// This returns a new WebAssembly binary which has all state
224 /// pre-initialized.
225 pub async fn snapshot(
226 &self,
227 mut cx: ModuleContext<'_>,
228 instance: &mut impl InstanceState,
229 ) -> anyhow::Result<Vec<u8>> {
230 // Parse rename spec.
231 let renames = FuncRenames::parse(&self.func_renames)?;
232
233 let snapshot = snapshot::snapshot(&cx, instance).await;
234 let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames);
235
236 self.debug_assert_valid_wasm(&rewritten_wasm);
237
238 Ok(rewritten_wasm)
239 }
240
241 fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
242 if !cfg!(debug_assertions) {
243 return;
244 }
245 if let Err(error) = self.wasm_validate(&wasm) {
246 #[cfg(feature = "wasmprinter")]
247 let wat = wasmprinter::print_bytes(&wasm)
248 .unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
249 #[cfg(not(feature = "wasmprinter"))]
250 let wat = "`wasmprinter` cargo feature is not enabled".to_string();
251 panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
252 }
253 }
254
255 fn wasm_validate(&self, wasm: &[u8]) -> anyhow::Result<()> {
256 log::debug!("Validating input Wasm");
257
258 wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
259 .validate_all(wasm)
260 .context("wasm validation failed")?;
261
262 for payload in wasmparser::Parser::new(0).parse_all(wasm) {
263 match payload? {
264 wasmparser::Payload::CodeSectionEntry(code) => {
265 let mut ops = code.get_operators_reader()?;
266 while !ops.eof() {
267 match ops.read()? {
268 // Table mutations aren't allowed as wizer has no
269 // way to record a snapshot of a table at this time.
270 // The only table mutations allowed are those from
271 // active element segments which can be
272 // deterministically replayed, so disallow all other
273 // forms of mutating a table.
274 //
275 // Ideally Wizer could take a snapshot of a table
276 // post-instantiation and then ensure that after
277 // running initialization the table didn't get
278 // mutated, allowing these instructions, but that's
279 // also not possible at this time.
280 wasmparser::Operator::TableCopy { .. } => {
281 anyhow::bail!("unsupported `table.copy` instruction")
282 }
283 wasmparser::Operator::TableInit { .. } => {
284 anyhow::bail!("unsupported `table.init` instruction")
285 }
286 wasmparser::Operator::TableSet { .. } => {
287 anyhow::bail!("unsupported `table.set` instruction")
288 }
289 wasmparser::Operator::TableGrow { .. } => {
290 anyhow::bail!("unsupported `table.grow` instruction")
291 }
292 wasmparser::Operator::TableFill { .. } => {
293 anyhow::bail!("unsupported `table.fill` instruction")
294 }
295
296 // Wizer has no way of dynamically determining which
297 // element or data segments were dropped during
298 // execution so instead disallow these instructions
299 // entirely. Like above it'd be nice to allow them
300 // but just forbid their execution during the
301 // initialization function, but that can't be done
302 // easily at this time.
303 wasmparser::Operator::ElemDrop { .. } => {
304 anyhow::bail!("unsupported `elem.drop` instruction")
305 }
306 wasmparser::Operator::DataDrop { .. } => {
307 anyhow::bail!("unsupported `data.drop` instruction")
308 }
309
310 // Wizer can't snapshot GC references, so disallow
311 // any mutation of GC references. This prevents, for
312 // example, reading something from a table and then
313 // mutating it.
314 wasmparser::Operator::StructSet { .. } => {
315 anyhow::bail!("unsupported `struct.set` instruction")
316 }
317 wasmparser::Operator::ArraySet { .. } => {
318 anyhow::bail!("unsupported `array.set` instruction")
319 }
320 wasmparser::Operator::ArrayFill { .. } => {
321 anyhow::bail!("unsupported `array.fill` instruction")
322 }
323 wasmparser::Operator::ArrayCopy { .. } => {
324 anyhow::bail!("unsupported `array.copy` instruction")
325 }
326 wasmparser::Operator::ArrayInitData { .. } => {
327 anyhow::bail!("unsupported `array.init_data` instruction")
328 }
329 wasmparser::Operator::ArrayInitElem { .. } => {
330 anyhow::bail!("unsupported `array.init_elem` instruction")
331 }
332
333 _ => continue,
334 }
335 }
336 }
337 wasmparser::Payload::GlobalSection(globals) => {
338 for g in globals {
339 let g = g?.ty;
340 if !g.mutable {
341 continue;
342 }
343 match g.content_type {
344 wasmparser::ValType::I32
345 | wasmparser::ValType::I64
346 | wasmparser::ValType::F32
347 | wasmparser::ValType::F64
348 | wasmparser::ValType::V128 => {}
349 wasmparser::ValType::Ref(_) => {
350 anyhow::bail!(
351 "unsupported mutable global containing a reference type"
352 )
353 }
354 }
355 }
356 }
357 _ => {}
358 }
359 }
360
361 Ok(())
362 }
363
364 fn get_keep_init_func(&self) -> bool {
365 match self.keep_init_func {
366 Some(keep) => keep.unwrap_or(true),
367 None => DEFAULT_KEEP_INIT_FUNC,
368 }
369 }
370}
371
372/// Abstract ability to load state from a WebAssembly instance after it's been
373/// instantiated and some exports have run.
374pub trait InstanceState {
375 /// Loads the global specified by `name`, returning a `SnapshotVal`.
376 ///
377 /// # Panics
378 ///
379 /// This function panics if `name` isn't an exported global or if the type
380 /// of the global doesn't fit in `SnapshotVal`.
381 fn global_get(&mut self, name: &str) -> impl Future<Output = SnapshotVal> + Send;
382
383 /// Loads the contents of the memory specified by `name`, returning the
384 /// entier contents as a `Vec<u8>`.
385 ///
386 /// # Panics
387 ///
388 /// This function panics if `name` isn't an exported memory.
389 fn memory_contents(
390 &mut self,
391 name: &str,
392 contents: impl FnOnce(&[u8]) + Send,
393 ) -> impl Future<Output = ()> + Send;
394}