wasm_bindgen_wasm_interpreter/
lib.rs

1//! A tiny and incomplete Wasm interpreter
2//!
3//! This module contains a tiny and incomplete Wasm interpreter built on top of
4//! `walrus`'s module structure. Each `Interpreter` contains some state
5//! about the execution of a Wasm instance. The "incomplete" part here is
6//! related to the fact that this is *only* used to execute the various
7//! descriptor functions for wasm-bindgen.
8//!
9//! As a recap, the wasm-bindgen macro generate "descriptor functions" which
10//! basically as a mapping of rustc's trait resolution in executable code. This
11//! allows us to detect, after the macro is invoke, what trait selection did and
12//! what types of functions look like. By executing descriptor functions they'll
13//! each invoke a known import (with only one argument) some number of times,
14//! which gives us a list of `u32` values to then decode.
15//!
16//! The interpreter here is only geared towards this one exact use case, so it's
17//! quite small and likely not extra-efficient.
18
19#![deny(missing_docs)]
20
21use anyhow::{bail, ensure};
22use std::collections::{BTreeMap, BTreeSet, HashMap};
23use walrus::ir::Instr;
24use walrus::{ElementId, FunctionId, LocalId, Module, TableId};
25
26/// A ready-to-go interpreter of a Wasm module.
27///
28/// An interpreter currently represents effectively cached state. It is reused
29/// between calls to `interpret` and is precomputed from a `Module`. It houses
30/// state like the Wasm stack, Wasm memory, etc.
31#[derive(Default)]
32pub struct Interpreter {
33    // Function index of the `__wbindgen_describe` and
34    // `__wbindgen_describe_closure` imported functions. We special case this
35    // to know when the environment's imported function is called.
36    describe_id: Option<FunctionId>,
37    describe_closure_id: Option<FunctionId>,
38
39    // Id of the function table
40    functions: Option<TableId>,
41
42    // A mapping of string names to the function index, filled with all exported
43    // functions.
44    name_map: HashMap<String, FunctionId>,
45
46    // The current stack pointer (global 0) and Wasm memory (the stack). Only
47    // used in a limited capacity.
48    sp: i32,
49    mem: Vec<i32>,
50    scratch: Vec<i32>,
51
52    // The descriptor which we're assembling, a list of `u32` entries. This is
53    // very specific to wasm-bindgen and is the purpose for the existence of
54    // this module.
55    descriptor: Vec<u32>,
56
57    // When invoking the `__wbindgen_describe_closure` imported function, this
58    // stores the last table index argument, used for finding a different
59    // descriptor.
60    descriptor_table_idx: Option<u32>,
61}
62
63impl Interpreter {
64    /// Creates a new interpreter from a provided `Module`, precomputing all
65    /// information necessary to interpret further.
66    ///
67    /// Note that the `module` passed in to this function must be the same as
68    /// the `module` passed to `interpret` below.
69    pub fn new(module: &Module) -> Result<Interpreter, anyhow::Error> {
70        let mut ret = Interpreter::default();
71
72        // Give ourselves some memory and set the stack pointer
73        // (the LLVM call stack, now the Wasm stack, global 0) to the top.
74        ret.mem = vec![0; 0x8000];
75        ret.sp = ret.mem.len() as i32;
76
77        // Figure out where the `__wbindgen_describe` imported function is, if
78        // it exists. We'll special case calls to this function as our
79        // interpretation should only invoke this function as an imported
80        // function.
81        for import in module.imports.iter() {
82            let id = match import.kind {
83                walrus::ImportKind::Function(id) => id,
84                _ => continue,
85            };
86            if import.module != "__wbindgen_placeholder__" {
87                continue;
88            }
89            if import.name == "__wbindgen_describe" {
90                ret.describe_id = Some(id);
91            } else if import.name == "__wbindgen_describe_closure" {
92                ret.describe_closure_id = Some(id);
93            }
94        }
95
96        // Build up the mapping of exported functions to function ids.
97        for export in module.exports.iter() {
98            let id = match export.item {
99                walrus::ExportItem::Function(id) => id,
100                _ => continue,
101            };
102            ret.name_map.insert(export.name.to_string(), id);
103        }
104
105        ret.functions = module.tables.main_function_table()?;
106
107        Ok(ret)
108    }
109
110    /// Interprets the execution of the descriptor function `func`.
111    ///
112    /// This function will execute `func` in the `module` provided. Note that
113    /// the `module` provided here must be the same as the one passed to `new`
114    /// when this `Interpreter` was constructed.
115    ///
116    /// The `func` must be a wasm-bindgen descriptor function meaning that it
117    /// doesn't do anything like use floats or i64. Instead all it should do is
118    /// call other functions, sometimes some stack pointer manipulation, and
119    /// then call the one imported `__wbindgen_describe` function. Anything else
120    /// will cause this interpreter to panic.
121    ///
122    /// When the descriptor has finished running the assembled descriptor list
123    /// is returned. The descriptor returned can then be re-parsed into an
124    /// actual `Descriptor` in the cli-support crate.
125    ///
126    /// # Return value
127    ///
128    /// Returns `Some` if `func` was found in the `module` and `None` if it was
129    /// not found in the `module`.
130    pub fn interpret_descriptor(&mut self, id: FunctionId, module: &Module) -> Option<&[u32]> {
131        self.descriptor.truncate(0);
132
133        // We should have a blank Wasm and LLVM stack at both the start and end
134        // of the call.
135        assert_eq!(self.sp, self.mem.len() as i32);
136        self.call(id, module, &[]);
137        assert_eq!(self.sp, self.mem.len() as i32);
138        Some(&self.descriptor)
139    }
140
141    /// Interprets a "closure descriptor", figuring out the signature of the
142    /// closure that was intended.
143    ///
144    /// This function will take an `id` which is known to internally
145    /// execute `__wbindgen_describe_closure` and interpret it. The
146    /// `wasm-bindgen` crate controls all callers of this internal import. It
147    /// will then take the index passed to `__wbindgen_describe_closure` and
148    /// interpret it as a function pointer. This means it'll look up within the
149    /// element section (function table) which index it points to. Upon finding
150    /// the relevant entry it'll assume that function is a descriptor function,
151    /// and then it will execute the descriptor function.
152    ///
153    /// The returned value is the return value of the descriptor function found.
154    /// The `entry_removal_list` list is also then populated with an index of
155    /// the entry in the elements section (and then the index within that
156    /// section) of the function that needs to be snip'd out.
157    pub fn interpret_closure_descriptor(
158        &mut self,
159        id: FunctionId,
160        module: &Module,
161        entry_removal_list: &mut HashMap<ElementId, BTreeSet<usize>>,
162    ) -> Option<&[u32]> {
163        // Call the `id` function. This is an internal `#[inline(never)]`
164        // whose code is completely controlled by the `wasm-bindgen` crate, so
165        // it should take some arguments (the number of arguments depends on the
166        // optimization level) and return one (all of which we don't care about
167        // here). What we're interested in is that while executing this function
168        // it'll call `__wbindgen_describe_closure` with an argument that we
169        // look for.
170        assert!(self.descriptor_table_idx.is_none());
171
172        let func = module.funcs.get(id);
173        let params = module.types.get(func.ty()).params();
174        assert!(
175            params.iter().all(|p| *p == walrus::ValType::I32),
176            "closure descriptors should only have i32 params"
177        );
178        let num_params = params.len();
179        assert!(
180            num_params <= 2,
181            "closure descriptors have 2 parameters, but might lose some parameters due to LTO"
182        );
183
184        let args = vec![0; num_params];
185        self.call(id, module, &args);
186        let descriptor_table_idx = self
187            .descriptor_table_idx
188            .take()
189            .expect("descriptor function should return index");
190
191        // After we've got the table index of the descriptor function we're
192        // interested go take a look in the function table to find what the
193        // actual index of the function is.
194        let entry =
195            wasm_bindgen_wasm_conventions::get_function_table_entry(module, descriptor_table_idx)
196                .expect("failed to find entry in function table");
197        let descriptor_id = entry.func.expect("element segment slot wasn't set");
198        entry_removal_list
199            .entry(entry.element)
200            .or_default()
201            .insert(entry.idx);
202
203        // And now execute the descriptor!
204        self.interpret_descriptor(descriptor_id, module)
205    }
206
207    /// Returns the function id of the `__wbindgen_describe_closure`
208    /// imported function.
209    pub fn describe_closure_id(&self) -> Option<FunctionId> {
210        self.describe_closure_id
211    }
212
213    /// Returns the detected id of the function table.
214    pub fn function_table_id(&self) -> Option<TableId> {
215        self.functions
216    }
217
218    fn call(&mut self, id: FunctionId, module: &Module, args: &[i32]) -> Option<i32> {
219        let func = module.funcs.get(id);
220        log::debug!("starting a call of {:?} {:?}", id, func.name);
221        log::debug!("arguments {:?}", args);
222        let local = match &func.kind {
223            walrus::FunctionKind::Local(l) => l,
224            _ => panic!("can only call locally defined functions"),
225        };
226
227        let entry = local.entry_block();
228        let block = local.block(entry);
229
230        let mut frame = Frame {
231            module,
232            interp: self,
233            locals: BTreeMap::new(),
234            done: false,
235        };
236
237        assert_eq!(local.args.len(), args.len());
238        for (arg, val) in local.args.iter().zip(args) {
239            frame.locals.insert(*arg, *val);
240        }
241
242        for (instr, _) in block.instrs.iter() {
243            if let Err(err) = frame.eval(instr) {
244                if let Some(name) = &module.funcs.get(id).name {
245                    panic!("{name}: {err}")
246                } else {
247                    panic!("{err}")
248                }
249            }
250
251            if frame.done {
252                break;
253            }
254        }
255        self.scratch.last().cloned()
256    }
257}
258
259struct Frame<'a> {
260    module: &'a Module,
261    interp: &'a mut Interpreter,
262    locals: BTreeMap<LocalId, i32>,
263    done: bool,
264}
265
266impl Frame<'_> {
267    fn eval(&mut self, instr: &Instr) -> anyhow::Result<()> {
268        use walrus::ir::*;
269
270        let stack = &mut self.interp.scratch;
271
272        match instr {
273            Instr::Const(c) => match c.value {
274                Value::I32(n) => stack.push(n),
275                _ => bail!("non-i32 constant"),
276            },
277            Instr::LocalGet(e) => stack.push(self.locals.get(&e.local).cloned().unwrap_or(0)),
278            Instr::LocalSet(e) => {
279                let val = stack.pop().unwrap();
280                self.locals.insert(e.local, val);
281            }
282            Instr::LocalTee(e) => {
283                let val = *stack.last().unwrap();
284                self.locals.insert(e.local, val);
285            }
286
287            // Blindly assume all globals are the stack pointer
288            Instr::GlobalGet(_) => stack.push(self.interp.sp),
289            Instr::GlobalSet(_) => {
290                let val = stack.pop().unwrap();
291                self.interp.sp = val;
292            }
293
294            // Support simple arithmetic, mainly for the stack pointer
295            // manipulation
296            Instr::Binop(e) => {
297                let rhs = stack.pop().unwrap();
298                let lhs = stack.pop().unwrap();
299                stack.push(match e.op {
300                    BinaryOp::I32Sub => lhs - rhs,
301                    BinaryOp::I32Add => lhs + rhs,
302                    op => bail!("invalid binary op {:?}", op),
303                });
304            }
305
306            // Support small loads/stores to the stack. These show up in debug
307            // mode where there's some traffic on the linear stack even when in
308            // theory there doesn't need to be.
309            Instr::Load(e) => {
310                let address = stack.pop().unwrap();
311                ensure!(
312                    address > 0,
313                    "Read a negative address value from the stack. Did we run out of memory?"
314                );
315                let address = address as u32 + e.arg.offset;
316                ensure!(address % 4 == 0);
317                stack.push(self.interp.mem[address as usize / 4])
318            }
319            Instr::Store(e) => {
320                let value = stack.pop().unwrap();
321                let address = stack.pop().unwrap();
322                ensure!(
323                    address > 0,
324                    "Read a negative address value from the stack. Did we run out of memory?"
325                );
326                let address = address as u32 + e.arg.offset;
327                ensure!(address % 4 == 0);
328                self.interp.mem[address as usize / 4] = value;
329            }
330
331            Instr::Return(_) => {
332                log::debug!("return");
333                self.done = true;
334            }
335
336            Instr::Drop(_) => {
337                log::debug!("drop");
338                stack.pop().unwrap();
339            }
340
341            Instr::Call(Call { func }) | Instr::ReturnCall(ReturnCall { func }) => {
342                let func = *func;
343                // If this function is calling the `__wbindgen_describe`
344                // function, which we've precomputed the id for, then
345                // it's telling us about the next `u32` element in the
346                // descriptor to return. We "call" the imported function
347                // here by directly inlining it.
348                if Some(func) == self.interp.describe_id {
349                    let val = stack.pop().unwrap();
350                    log::debug!("__wbindgen_describe({})", val);
351                    self.interp.descriptor.push(val as u32);
352
353                // If this function is calling the `__wbindgen_describe_closure`
354                // function then it's similar to the above, except there's a
355                // slightly different signature. Note that we don't eval the
356                // previous arguments because they shouldn't have any side
357                // effects we're interested in.
358                } else if Some(func) == self.interp.describe_closure_id {
359                    let val = stack.pop().unwrap();
360                    stack.pop();
361                    stack.pop();
362                    log::debug!("__wbindgen_describe_closure({})", val);
363                    self.interp.descriptor_table_idx = Some(val as u32);
364                    stack.push(0)
365
366                // ... otherwise this is a normal call so we recurse.
367                } else {
368                    // Skip profiling related functions which we don't want to interpret.
369                    if self
370                        .module
371                        .funcs
372                        .get(func)
373                        .name
374                        .as_ref()
375                        .is_some_and(|name| {
376                            name.starts_with("__llvm_profile_init")
377                                || name.starts_with("__llvm_profile_register_function")
378                                || name.starts_with("__llvm_profile_register_function")
379                        })
380                    {
381                        return Ok(());
382                    }
383
384                    let ty = self.module.types.get(self.module.funcs.get(func).ty());
385                    let args = (0..ty.params().len())
386                        .map(|_| stack.pop().unwrap())
387                        .collect::<Vec<_>>();
388
389                    self.interp.call(func, self.module, &args);
390                }
391
392                if let Instr::ReturnCall(_) = instr {
393                    log::debug!("return_call");
394                    self.done = true;
395                }
396            }
397
398            // All other instructions shouldn't be used by our various
399            // descriptor functions. LLVM optimizations may mean that some
400            // of the above instructions aren't actually needed either, but
401            // the above instructions have empirically been required when
402            // executing our own test suite in wasm-bindgen.
403            //
404            // Note that LLVM may change over time to generate new
405            // instructions in debug mode, and we'll have to react to those
406            // sorts of changes as they arise.
407            s => bail!("unknown instruction {:?}", s),
408        }
409
410        Ok(())
411    }
412}