wasm-bindgen-cli-support 0.2.118

Shared support for the wasm-bindgen-cli package, an internal dependency
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
//! A tiny and incomplete Wasm interpreter
//!
//! This module contains a tiny and incomplete Wasm interpreter built on top of
//! `walrus`'s module structure. Each `Interpreter` contains some state
//! about the execution of a Wasm instance. The "incomplete" part here is
//! related to the fact that this is *only* used to execute the various
//! descriptor functions for wasm-bindgen.
//!
//! As a recap, the wasm-bindgen macro generate "descriptor functions" which
//! basically as a mapping of rustc's trait resolution in executable code. This
//! allows us to detect, after the macro is invoke, what trait selection did and
//! what types of functions look like. By executing descriptor functions they'll
//! each invoke a known import (with only one argument) some number of times,
//! which gives us a list of `u32` values to then decode.
//!
//! The interpreter here is only geared towards this one exact use case, so it's
//! quite small and likely not extra-efficient.

#![deny(missing_docs)]

use crate::wasm_conventions;
use anyhow::{bail, ensure};
use std::collections::{BTreeMap, HashMap, HashSet};
use walrus::ir::InstrSeqId;
use walrus::{ExportId, FunctionId, GlobalId, GlobalKind, LocalFunction, LocalId, Module};

/// A ready-to-go interpreter of a Wasm module.
///
/// An interpreter currently represents effectively cached state. It is reused
/// between calls to `interpret` and is precomputed from a `Module`. It houses
/// state like the Wasm stack, Wasm memory, etc.
#[derive(Default)]
pub struct Interpreter {
    // Function index of the `__wbindgen_describe` and
    // `__wbindgen_describe_cast` imported functions. We special case this
    // to know when the environment's imported function is called.
    describe_id: Option<FunctionId>,
    describe_cast_id: Option<FunctionId>,

    // Linear memory mirroring the module's own, used for stack loads/stores
    // during descriptor execution.
    mem: Vec<i32>,
    scratch: Vec<i32>,

    // GlobalId of __stack_pointer, if found. Used to validate that the stack
    // pointer is restored after each descriptor execution.
    stack_pointer: Option<GlobalId>,

    // The stack pointer value at the start of each interpret_descriptor call,
    // used to validate restoration and to unwind early exits (describe_cast).
    stack_pointer_initial: i32,

    // Live state of all locally-defined i32 globals, snapshotted from the
    // module at construction and mutated freely during interpretation.
    globals: HashMap<GlobalId, i32>,

    // The descriptor which we're assembling, a list of `u32` entries. This is
    // very specific to wasm-bindgen and is the purpose for the existence of
    // this module.
    descriptor: Vec<u32>,

    /// The `__wbindgen_skip_interpret_calls`'s id.
    skip_interpret: Option<ExportId>,

    /// Some functions that need to skip interpret, such as `__wasm_call_ctors`
    /// and `__wasm_call_dtors`.
    skip_calls: HashSet<FunctionId>,
    stopped: bool,
}

fn skip_calls(module: &Module, id: FunctionId) -> HashSet<FunctionId> {
    use walrus::ir::*;

    let func = module.funcs.get(id);

    let local = match &func.kind {
        walrus::FunctionKind::Local(l) => l,
        _ => panic!("can only call locally defined functions"),
    };

    let entry = local.entry_block();
    let block = local.block(entry);

    block
        .instrs
        .iter()
        .filter_map(|(instr, _)| match instr {
            // There are only up to three calls for now:
            //   1. __wasm_call_ctors (`#[link_section = ".init_array"]`)
            //   2. __wbindgen_skip_interpret_calls (The original symbol, we don't care about it)
            //   3. __wasm_call_dtors (This symbol may not be present in Rust program, but may be present if C program is linked)
            Instr::Call(Call { func }) | Instr::ReturnCall(ReturnCall { func }) => Some(*func),
            // Typically, there are no other instructions, or only a return instruction.
            //
            // When coverage is turned on, there may be llvm coverage instrumentation
            // instructions.
            _ => None,
        })
        .collect()
}

impl Interpreter {
    /// Creates a new interpreter from a provided `Module`, precomputing all
    /// information necessary to interpret further.
    ///
    /// Note that the `module` passed in to this function must be the same as
    /// the `module` passed to `interpret` below.
    pub fn new(module: &Module) -> Result<Interpreter, anyhow::Error> {
        let mut ret = Interpreter {
            // Mirror the module's own linear memory so the stack pointer's
            // snapshotted value is directly valid as an index. If there is no
            // memory, descriptor functions can't do any loads/stores, so an
            // empty vec is fine (any attempted access will panic).
            mem: module
                .memories
                .iter()
                .next()
                .map_or(vec![], |m| vec![0; m.initial as usize * 65536 / 4]),
            ..Default::default()
        };

        // Snapshot all locally-defined i32 globals so the interpreter can
        // read/write them during descriptor execution.
        for global in module.globals.iter() {
            if let GlobalKind::Local(walrus::ConstExpr::Value(walrus::ir::Value::I32(n))) =
                global.kind
            {
                ret.globals.insert(global.id(), n);
            }
        }

        if let Some(sp) = wasm_conventions::get_stack_pointer(module) {
            ret.stack_pointer = Some(sp);
        }

        // Figure out where the `__wbindgen_describe` imported function is, if
        // it exists. We'll special case calls to this function as our
        // interpretation should only invoke this function as an imported
        // function.
        for import in module.imports.iter() {
            let id = match import.kind {
                walrus::ImportKind::Function(id) => id,
                _ => continue,
            };
            if import.module != "__wbindgen_placeholder__" {
                continue;
            }
            if import.name == "__wbindgen_describe" {
                ret.describe_id = Some(id);
            } else if import.name == "__wbindgen_describe_cast" {
                ret.describe_cast_id = Some(id);
            }
        }

        // Setup skip_interpret id and skip_calls
        if let Some(export) = module
            .exports
            .iter()
            .find(|export| export.name == "__wbindgen_skip_interpret_calls")
        {
            let id = match export.item {
                walrus::ExportItem::Function(id) => id,
                _ => panic!("__wbindgen_skip_interpret_calls must be an export function"),
            };
            ret.skip_interpret = Some(export.id());
            ret.skip_calls = skip_calls(module, id);
        }

        Ok(ret)
    }

    /// Interprets the execution of the descriptor function `func`.
    ///
    /// This function will execute `func` in the `module` provided. Note that
    /// the `module` provided here must be the same as the one passed to `new`
    /// when this `Interpreter` was constructed.
    ///
    /// The `func` must be a wasm-bindgen descriptor function meaning that it
    /// doesn't do anything like use floats or i64. Instead all it should do is
    /// call other functions, sometimes some stack pointer manipulation, and
    /// then call the one imported `__wbindgen_describe` function. Anything else
    /// will cause this interpreter to panic.
    ///
    /// When the descriptor has finished running the assembled descriptor list
    /// is returned. The descriptor returned can then be re-parsed into an
    /// actual `Descriptor` in the cli-support crate.
    ///
    /// # Return value
    ///
    /// Returns `Some` if `func` was found in the `module` and `None` if it was
    /// not found in the `module`.
    pub fn interpret_descriptor(&mut self, id: FunctionId, module: &Module) -> &[u32] {
        self.descriptor.truncate(0);
        self.stopped = false;

        if let Some(sp) = self.stack_pointer {
            self.stack_pointer_initial = self.globals[&sp];
        }

        let func = module.funcs.get(id);
        let ty = module.types.get(func.ty());

        self.call(id, module, &vec![0; ty.params().len()]);

        // Validate the stack pointer was restored to its value at function entry.
        if let Some(sp) = self.stack_pointer {
            assert_eq!(self.globals[&sp], self.stack_pointer_initial);
        }
        &self.descriptor
    }

    /// Returns the function id of the `__wbindgen_describe_cast`
    /// imported function.
    pub fn describe_cast_id(&self) -> Option<FunctionId> {
        self.describe_cast_id
    }

    /// Returns the export id of the `__wbindgen_skip_interpret_calls`.
    pub fn skip_interpret(&self) -> Option<ExportId> {
        self.skip_interpret
    }

    fn call(&mut self, id: FunctionId, module: &Module, args: &[i32]) {
        let func = module.funcs.get(id);
        log::trace!("starting a call of {id:?} {:?}", func.name);
        log::trace!("arguments {args:?}");
        let local = match &func.kind {
            walrus::FunctionKind::Local(l) => l,
            _ => panic!("can only call locally defined functions"),
        };

        let mut frame = Frame {
            module,
            func: local,
            interp: self,
            locals: BTreeMap::new(),
        };

        assert_eq!(local.args.len(), args.len());
        for (arg, val) in local.args.iter().zip(args) {
            frame.locals.insert(*arg, *val);
        }

        frame.eval(local.entry_block()).unwrap_or_else(|err| {
            if let Some(name) = &module.funcs.get(id).name {
                panic!("{name}: {err}")
            } else {
                panic!("{err}")
            }
        })
    }
}

struct Frame<'a> {
    module: &'a Module,
    func: &'a LocalFunction,
    interp: &'a mut Interpreter,
    locals: BTreeMap<LocalId, i32>,
}

impl Frame<'_> {
    fn eval(&mut self, seq: InstrSeqId) -> anyhow::Result<()> {
        use walrus::ir::*;

        for (instr, _) in self.func.block(seq).iter() {
            let stack = &mut self.interp.scratch;

            match instr {
                Instr::Const(c) => match c.value {
                    Value::I32(n) => stack.push(n),
                    _ => bail!("non-i32 constant"),
                },
                Instr::LocalGet(e) => stack.push(self.locals.get(&e.local).cloned().unwrap_or(0)),
                Instr::LocalSet(e) => {
                    let val = stack.pop().unwrap();
                    self.locals.insert(e.local, val);
                }
                Instr::LocalTee(e) => {
                    let val = *stack.last().unwrap();
                    self.locals.insert(e.local, val);
                }

                Instr::GlobalGet(e) => {
                    let val = *self.interp.globals.get(&e.global).unwrap_or_else(|| {
                        panic!(
                            "global {:?} not found, this is a bug in wasm-bindgen",
                            e.global
                        )
                    });
                    stack.push(val);
                }
                Instr::GlobalSet(e) => {
                    let val = stack.pop().unwrap();
                    self.interp.globals.insert(e.global, val);
                }

                // Support simple arithmetic, mainly for the stack pointer
                // manipulation
                Instr::Binop(e) => {
                    let rhs = stack.pop().unwrap();
                    let lhs = stack.pop().unwrap();
                    stack.push(match e.op {
                        BinaryOp::I32Sub => lhs - rhs,
                        BinaryOp::I32Add => lhs + rhs,
                        op => bail!("invalid binary op {op:?}"),
                    });
                }

                // Support small loads/stores to the stack. These show up in debug
                // mode where there's some traffic on the linear stack even when in
                // theory there doesn't need to be.
                Instr::Load(e) => {
                    let address = stack.pop().unwrap();
                    let address = address as u32 + e.arg.offset as u32;
                    ensure!(
                        address > 0,
                        "Read a negative or zero address value from the stack. Did we run out of memory?"
                    );
                    let width = e.kind.width();
                    ensure!(address % width == 0);
                    let val = self.interp.mem[address as usize / 4];
                    if width == 4 {
                        stack.push(val)
                    } else if width == 1 {
                        let result = val.to_le_bytes()[(address % 4) as usize];
                        let LoadKind::I32_8 { kind } = e.kind else {
                            panic!("Unhandled load kind {:?}", e.kind)
                        };
                        match kind {
                            ExtendedLoad::SignExtend => {
                                stack.push(result as i8 as i32);
                            }
                            ExtendedLoad::ZeroExtend | ExtendedLoad::ZeroExtendAtomic => {
                                stack.push(result as i32);
                            }
                        };
                    } else {
                        panic!("Unhandled load width {width}");
                    }
                }
                Instr::Store(e) => {
                    let value = stack.pop().unwrap();
                    let address = stack.pop().unwrap();
                    let address = address as u32 + e.arg.offset as u32;
                    ensure!(
                        address > 0,
                        "Read a negative or zero address value from the stack. Did we run out of memory?"
                    );
                    let width = e.kind.width();
                    ensure!(address % width == 0);
                    let index = address as usize / 4;
                    if width == 8 {
                        // Oops our stack is of i32s so we can't really handle a
                        // store of width 8. Just treat the more signifcant 4
                        // bytes as 0.
                        self.interp.mem[index] = value;
                        self.interp.mem[index + 1] = 0;
                    } else if width == 4 {
                        self.interp.mem[index] = value;
                    } else if width == 1 {
                        let mut bytes = self.interp.mem[index].to_le_bytes();
                        bytes[(address % 4) as usize] = value as u8;
                        self.interp.mem[index] = i32::from_le_bytes(bytes);
                    } else {
                        panic!("Unhandled store width {width}");
                    }
                }

                Instr::Return(_) => {
                    log::trace!("return");
                    break;
                }

                Instr::Drop(_) => {
                    log::trace!("drop");
                    stack.pop().unwrap();
                }

                Instr::Call(Call { func }) | Instr::ReturnCall(ReturnCall { func }) => {
                    let func = *func;
                    // If this function is calling the `__wbindgen_describe`
                    // function, which we've precomputed the id for, then
                    // it's telling us about the next `u32` element in the
                    // descriptor to return. We "call" the imported function
                    // here by directly inlining it.
                    if Some(func) == self.interp.describe_id {
                        let val = stack.pop().unwrap();
                        log::trace!("__wbindgen_describe({val})");
                        self.interp.descriptor.push(val as u32);

                    // If this function is calling the `__wbindgen_describe_cast`
                    // function then it's just a marker for the parent function
                    // to be treated as a cast.
                    } else if Some(func) == self.interp.describe_cast_id {
                        log::trace!("__wbindgen_describe_cast()");
                        // `__wbindgen_describe_cast` marks the end of the cast
                        // descriptor. Stop here, ignoring anything on the stack.
                        // Restore SP to its entry value since the normal function
                        // epilogue won't run.
                        if let Some(sp) = self.interp.stack_pointer {
                            self.interp
                                .globals
                                .insert(sp, self.interp.stack_pointer_initial);
                        }
                        self.interp.stopped = true;
                        break;

                    // ... otherwise this is a normal call so we recurse.
                    } else {
                        // Skip the constructor function.
                        //
                        // Complex logic can be implemented in the ctor, our simple interpreter will fail
                        // to execute due to missing instructions.
                        //
                        // For example, executing `1 + 1` fails due to the lack of `I32.And` instruction.
                        //
                        // Because `wasm-ld` may insert a call to ctor from the beginning of every function that
                        // your module exports, the interpreter will enter the ctor logic when parsing the
                        // `wasm-bindgen` function, causing failure.
                        if self.interp.skip_calls.contains(&func) {
                            continue;
                        }

                        // Skip profiling related functions which we don't want to interpret.
                        if self
                            .module
                            .funcs
                            .get(func)
                            .name
                            .as_ref()
                            .is_some_and(|name| {
                                name.starts_with("__llvm_profile_init")
                                    || name.starts_with("__llvm_profile_register_function")
                                    || name.starts_with("__llvm_profile_register_function")
                            })
                        {
                            continue;
                        }

                        let ty = self.module.types.get(self.module.funcs.get(func).ty());
                        let mut args = (0..ty.params().len())
                            .map(|_| stack.pop().unwrap())
                            .collect::<Vec<_>>();
                        args.reverse();

                        self.interp.call(func, self.module, &args);
                    }

                    if let Instr::ReturnCall(_) = instr {
                        log::trace!("return_call");
                        break;
                    }
                }

                Instr::Block(block) => {
                    self.eval(block.seq)?;
                    if self.interp.stopped {
                        break;
                    }
                }

                Instr::Try(block) => {
                    self.eval(block.seq)?;
                    if self.interp.stopped {
                        break;
                    }
                }

                Instr::TryTable(block) => {
                    self.eval(block.seq)?;
                    if self.interp.stopped {
                        break;
                    }
                }

                // All other instructions shouldn't be used by our various
                // descriptor functions. LLVM optimizations may mean that some
                // of the above instructions aren't actually needed either, but
                // the above instructions have empirically been required when
                // executing our own test suite in wasm-bindgen.
                //
                // Note that LLVM may change over time to generate new
                // instructions in debug mode, and we'll have to react to those
                // sorts of changes as they arise.
                s => bail!("unknown instruction {s:?}"),
            }
        }

        Ok(())
    }
}

#[cfg(test)]
mod smoke_tests;