wasm_bindgen_wasm_interpreter/lib.rs
1//! A tiny and incomplete Wasm interpreter
2//!
3//! This module contains a tiny and incomplete Wasm interpreter built on top of
4//! `walrus`'s module structure. Each `Interpreter` contains some state
5//! about the execution of a Wasm instance. The "incomplete" part here is
6//! related to the fact that this is *only* used to execute the various
7//! descriptor functions for wasm-bindgen.
8//!
9//! As a recap, the wasm-bindgen macro generate "descriptor functions" which
10//! basically as a mapping of rustc's trait resolution in executable code. This
11//! allows us to detect, after the macro is invoke, what trait selection did and
12//! what types of functions look like. By executing descriptor functions they'll
13//! each invoke a known import (with only one argument) some number of times,
14//! which gives us a list of `u32` values to then decode.
15//!
16//! The interpreter here is only geared towards this one exact use case, so it's
17//! quite small and likely not extra-efficient.
18
19#![deny(missing_docs)]
20
21use anyhow::{bail, ensure};
22use std::collections::{BTreeMap, BTreeSet, HashMap};
23use walrus::ir::Instr;
24use walrus::{ElementId, FunctionId, LocalId, Module, TableId};
25
26/// A ready-to-go interpreter of a Wasm module.
27///
28/// An interpreter currently represents effectively cached state. It is reused
29/// between calls to `interpret` and is precomputed from a `Module`. It houses
30/// state like the Wasm stack, Wasm memory, etc.
31#[derive(Default)]
32pub struct Interpreter {
33 // Function index of the `__wbindgen_describe` and
34 // `__wbindgen_describe_closure` imported functions. We special case this
35 // to know when the environment's imported function is called.
36 describe_id: Option<FunctionId>,
37 describe_closure_id: Option<FunctionId>,
38
39 // Id of the function table
40 functions: Option<TableId>,
41
42 // A mapping of string names to the function index, filled with all exported
43 // functions.
44 name_map: HashMap<String, FunctionId>,
45
46 // The current stack pointer (global 0) and Wasm memory (the stack). Only
47 // used in a limited capacity.
48 sp: i32,
49 mem: Vec<i32>,
50 scratch: Vec<i32>,
51
52 // The descriptor which we're assembling, a list of `u32` entries. This is
53 // very specific to wasm-bindgen and is the purpose for the existence of
54 // this module.
55 descriptor: Vec<u32>,
56
57 // When invoking the `__wbindgen_describe_closure` imported function, this
58 // stores the last table index argument, used for finding a different
59 // descriptor.
60 descriptor_table_idx: Option<u32>,
61}
62
63impl Interpreter {
64 /// Creates a new interpreter from a provided `Module`, precomputing all
65 /// information necessary to interpret further.
66 ///
67 /// Note that the `module` passed in to this function must be the same as
68 /// the `module` passed to `interpret` below.
69 pub fn new(module: &Module) -> Result<Interpreter, anyhow::Error> {
70 let mut ret = Interpreter::default();
71
72 // Give ourselves some memory and set the stack pointer
73 // (the LLVM call stack, now the Wasm stack, global 0) to the top.
74 ret.mem = vec![0; 0x8000];
75 ret.sp = ret.mem.len() as i32;
76
77 // Figure out where the `__wbindgen_describe` imported function is, if
78 // it exists. We'll special case calls to this function as our
79 // interpretation should only invoke this function as an imported
80 // function.
81 for import in module.imports.iter() {
82 let id = match import.kind {
83 walrus::ImportKind::Function(id) => id,
84 _ => continue,
85 };
86 if import.module != "__wbindgen_placeholder__" {
87 continue;
88 }
89 if import.name == "__wbindgen_describe" {
90 ret.describe_id = Some(id);
91 } else if import.name == "__wbindgen_describe_closure" {
92 ret.describe_closure_id = Some(id);
93 }
94 }
95
96 // Build up the mapping of exported functions to function ids.
97 for export in module.exports.iter() {
98 let id = match export.item {
99 walrus::ExportItem::Function(id) => id,
100 _ => continue,
101 };
102 ret.name_map.insert(export.name.to_string(), id);
103 }
104
105 ret.functions = module.tables.main_function_table()?;
106
107 Ok(ret)
108 }
109
110 /// Interprets the execution of the descriptor function `func`.
111 ///
112 /// This function will execute `func` in the `module` provided. Note that
113 /// the `module` provided here must be the same as the one passed to `new`
114 /// when this `Interpreter` was constructed.
115 ///
116 /// The `func` must be a wasm-bindgen descriptor function meaning that it
117 /// doesn't do anything like use floats or i64. Instead all it should do is
118 /// call other functions, sometimes some stack pointer manipulation, and
119 /// then call the one imported `__wbindgen_describe` function. Anything else
120 /// will cause this interpreter to panic.
121 ///
122 /// When the descriptor has finished running the assembled descriptor list
123 /// is returned. The descriptor returned can then be re-parsed into an
124 /// actual `Descriptor` in the cli-support crate.
125 ///
126 /// # Return value
127 ///
128 /// Returns `Some` if `func` was found in the `module` and `None` if it was
129 /// not found in the `module`.
130 pub fn interpret_descriptor(&mut self, id: FunctionId, module: &Module) -> Option<&[u32]> {
131 self.descriptor.truncate(0);
132
133 // We should have a blank Wasm and LLVM stack at both the start and end
134 // of the call.
135 assert_eq!(self.sp, self.mem.len() as i32);
136 self.call(id, module, &[]);
137 assert_eq!(self.sp, self.mem.len() as i32);
138 Some(&self.descriptor)
139 }
140
141 /// Interprets a "closure descriptor", figuring out the signature of the
142 /// closure that was intended.
143 ///
144 /// This function will take an `id` which is known to internally
145 /// execute `__wbindgen_describe_closure` and interpret it. The
146 /// `wasm-bindgen` crate controls all callers of this internal import. It
147 /// will then take the index passed to `__wbindgen_describe_closure` and
148 /// interpret it as a function pointer. This means it'll look up within the
149 /// element section (function table) which index it points to. Upon finding
150 /// the relevant entry it'll assume that function is a descriptor function,
151 /// and then it will execute the descriptor function.
152 ///
153 /// The returned value is the return value of the descriptor function found.
154 /// The `entry_removal_list` list is also then populated with an index of
155 /// the entry in the elements section (and then the index within that
156 /// section) of the function that needs to be snip'd out.
157 pub fn interpret_closure_descriptor(
158 &mut self,
159 id: FunctionId,
160 module: &Module,
161 entry_removal_list: &mut HashMap<ElementId, BTreeSet<usize>>,
162 ) -> Option<&[u32]> {
163 // Call the `id` function. This is an internal `#[inline(never)]`
164 // whose code is completely controlled by the `wasm-bindgen` crate, so
165 // it should take some arguments (the number of arguments depends on the
166 // optimization level) and return one (all of which we don't care about
167 // here). What we're interested in is that while executing this function
168 // it'll call `__wbindgen_describe_closure` with an argument that we
169 // look for.
170 assert!(self.descriptor_table_idx.is_none());
171
172 let func = module.funcs.get(id);
173 let params = module.types.get(func.ty()).params();
174 assert!(
175 params.iter().all(|p| *p == walrus::ValType::I32),
176 "closure descriptors should only have i32 params"
177 );
178 let num_params = params.len();
179 assert!(
180 num_params <= 2,
181 "closure descriptors have 2 parameters, but might lose some parameters due to LTO"
182 );
183
184 let args = vec![0; num_params];
185 self.call(id, module, &args);
186 let descriptor_table_idx = self
187 .descriptor_table_idx
188 .take()
189 .expect("descriptor function should return index");
190
191 // After we've got the table index of the descriptor function we're
192 // interested go take a look in the function table to find what the
193 // actual index of the function is.
194 let entry =
195 wasm_bindgen_wasm_conventions::get_function_table_entry(module, descriptor_table_idx)
196 .expect("failed to find entry in function table");
197 let descriptor_id = entry.func.expect("element segment slot wasn't set");
198 entry_removal_list
199 .entry(entry.element)
200 .or_default()
201 .insert(entry.idx);
202
203 // And now execute the descriptor!
204 self.interpret_descriptor(descriptor_id, module)
205 }
206
207 /// Returns the function id of the `__wbindgen_describe_closure`
208 /// imported function.
209 pub fn describe_closure_id(&self) -> Option<FunctionId> {
210 self.describe_closure_id
211 }
212
213 /// Returns the detected id of the function table.
214 pub fn function_table_id(&self) -> Option<TableId> {
215 self.functions
216 }
217
218 fn call(&mut self, id: FunctionId, module: &Module, args: &[i32]) -> Option<i32> {
219 let func = module.funcs.get(id);
220 log::debug!("starting a call of {:?} {:?}", id, func.name);
221 log::debug!("arguments {:?}", args);
222 let local = match &func.kind {
223 walrus::FunctionKind::Local(l) => l,
224 _ => panic!("can only call locally defined functions"),
225 };
226
227 let entry = local.entry_block();
228 let block = local.block(entry);
229
230 let mut frame = Frame {
231 module,
232 interp: self,
233 locals: BTreeMap::new(),
234 done: false,
235 };
236
237 assert_eq!(local.args.len(), args.len());
238 for (arg, val) in local.args.iter().zip(args) {
239 frame.locals.insert(*arg, *val);
240 }
241
242 for (instr, _) in block.instrs.iter() {
243 if let Err(err) = frame.eval(instr) {
244 if let Some(name) = &module.funcs.get(id).name {
245 panic!("{name}: {err}")
246 } else {
247 panic!("{err}")
248 }
249 }
250
251 if frame.done {
252 break;
253 }
254 }
255 self.scratch.last().cloned()
256 }
257}
258
259struct Frame<'a> {
260 module: &'a Module,
261 interp: &'a mut Interpreter,
262 locals: BTreeMap<LocalId, i32>,
263 done: bool,
264}
265
266impl Frame<'_> {
267 fn eval(&mut self, instr: &Instr) -> anyhow::Result<()> {
268 use walrus::ir::*;
269
270 let stack = &mut self.interp.scratch;
271
272 match instr {
273 Instr::Const(c) => match c.value {
274 Value::I32(n) => stack.push(n),
275 _ => bail!("non-i32 constant"),
276 },
277 Instr::LocalGet(e) => stack.push(self.locals.get(&e.local).cloned().unwrap_or(0)),
278 Instr::LocalSet(e) => {
279 let val = stack.pop().unwrap();
280 self.locals.insert(e.local, val);
281 }
282 Instr::LocalTee(e) => {
283 let val = *stack.last().unwrap();
284 self.locals.insert(e.local, val);
285 }
286
287 // Blindly assume all globals are the stack pointer
288 Instr::GlobalGet(_) => stack.push(self.interp.sp),
289 Instr::GlobalSet(_) => {
290 let val = stack.pop().unwrap();
291 self.interp.sp = val;
292 }
293
294 // Support simple arithmetic, mainly for the stack pointer
295 // manipulation
296 Instr::Binop(e) => {
297 let rhs = stack.pop().unwrap();
298 let lhs = stack.pop().unwrap();
299 stack.push(match e.op {
300 BinaryOp::I32Sub => lhs - rhs,
301 BinaryOp::I32Add => lhs + rhs,
302 op => bail!("invalid binary op {:?}", op),
303 });
304 }
305
306 // Support small loads/stores to the stack. These show up in debug
307 // mode where there's some traffic on the linear stack even when in
308 // theory there doesn't need to be.
309 Instr::Load(e) => {
310 let address = stack.pop().unwrap();
311 ensure!(
312 address > 0,
313 "Read a negative address value from the stack. Did we run out of memory?"
314 );
315 let address = address as u32 + e.arg.offset;
316 ensure!(address % 4 == 0);
317 stack.push(self.interp.mem[address as usize / 4])
318 }
319 Instr::Store(e) => {
320 let value = stack.pop().unwrap();
321 let address = stack.pop().unwrap();
322 ensure!(
323 address > 0,
324 "Read a negative address value from the stack. Did we run out of memory?"
325 );
326 let address = address as u32 + e.arg.offset;
327 ensure!(address % 4 == 0);
328 self.interp.mem[address as usize / 4] = value;
329 }
330
331 Instr::Return(_) => {
332 log::debug!("return");
333 self.done = true;
334 }
335
336 Instr::Drop(_) => {
337 log::debug!("drop");
338 stack.pop().unwrap();
339 }
340
341 Instr::Call(Call { func }) | Instr::ReturnCall(ReturnCall { func }) => {
342 let func = *func;
343 // If this function is calling the `__wbindgen_describe`
344 // function, which we've precomputed the id for, then
345 // it's telling us about the next `u32` element in the
346 // descriptor to return. We "call" the imported function
347 // here by directly inlining it.
348 if Some(func) == self.interp.describe_id {
349 let val = stack.pop().unwrap();
350 log::debug!("__wbindgen_describe({})", val);
351 self.interp.descriptor.push(val as u32);
352
353 // If this function is calling the `__wbindgen_describe_closure`
354 // function then it's similar to the above, except there's a
355 // slightly different signature. Note that we don't eval the
356 // previous arguments because they shouldn't have any side
357 // effects we're interested in.
358 } else if Some(func) == self.interp.describe_closure_id {
359 let val = stack.pop().unwrap();
360 stack.pop();
361 stack.pop();
362 log::debug!("__wbindgen_describe_closure({})", val);
363 self.interp.descriptor_table_idx = Some(val as u32);
364 stack.push(0)
365
366 // ... otherwise this is a normal call so we recurse.
367 } else {
368 // Skip profiling related functions which we don't want to interpret.
369 if self
370 .module
371 .funcs
372 .get(func)
373 .name
374 .as_ref()
375 .is_some_and(|name| {
376 name.starts_with("__llvm_profile_init")
377 || name.starts_with("__llvm_profile_register_function")
378 || name.starts_with("__llvm_profile_register_function")
379 })
380 {
381 return Ok(());
382 }
383
384 let ty = self.module.types.get(self.module.funcs.get(func).ty());
385 let args = (0..ty.params().len())
386 .map(|_| stack.pop().unwrap())
387 .collect::<Vec<_>>();
388
389 self.interp.call(func, self.module, &args);
390 }
391
392 if let Instr::ReturnCall(_) = instr {
393 log::debug!("return_call");
394 self.done = true;
395 }
396 }
397
398 // All other instructions shouldn't be used by our various
399 // descriptor functions. LLVM optimizations may mean that some
400 // of the above instructions aren't actually needed either, but
401 // the above instructions have empirically been required when
402 // executing our own test suite in wasm-bindgen.
403 //
404 // Note that LLVM may change over time to generate new
405 // instructions in debug mode, and we'll have to react to those
406 // sorts of changes as they arise.
407 s => bail!("unknown instruction {:?}", s),
408 }
409
410 Ok(())
411 }
412}