soroban_env_host_zephyr/
vm.rs

1//! This module primarily provides the [Vm] type and the necessary name-lookup
2//! and runtime-dispatch mechanisms needed to allow WASM modules to call into
3//! the [Env](crate::Env) interface implemented by [Host].
4//!
5//! It also contains helper methods to look up and call into contract functions
6//! in terms of [ScVal] and [Val] arguments.
7//!
8//! The implementation of WASM types and the WASM bytecode interpreter come from
9//! the [wasmi](https://github.com/paritytech/wasmi) project.
10
11mod dispatch;
12mod fuel_refillable;
13mod func_info;
14mod module_cache;
15mod parsed_module;
16
17#[cfg(feature = "bench")]
18pub(crate) use dispatch::dummy0;
19#[cfg(test)]
20pub(crate) use dispatch::protocol_gated_dummy;
21
22use crate::{
23    budget::{get_wasmi_config, AsBudget, Budget},
24    host::{
25        error::TryBorrowOrErr,
26        metered_clone::MeteredContainer,
27        metered_hash::{CountingHasher, MeteredHash},
28    },
29    xdr::{ContractCostType, Hash, ScErrorCode, ScErrorType},
30    ConversionError, Host, HostError, Symbol, SymbolStr, TryIntoVal, Val, WasmiMarshal,
31};
32use std::{cell::RefCell, collections::BTreeSet, rc::Rc};
33
34use fuel_refillable::FuelRefillable;
35use func_info::HOST_FUNCTIONS;
36
37pub use module_cache::ModuleCache;
38pub use parsed_module::{ParsedModule, VersionedContractCodeCostInputs};
39
40use wasmi::{Instance, Linker, Memory, Store, Value};
41
42use crate::VmCaller;
43use wasmi::{Caller, StoreContextMut};
44
45impl wasmi::core::HostError for HostError {}
46
47const MAX_VM_ARGS: usize = 32;
48const WASM_STD_MEM_PAGE_SIZE_IN_BYTES: u32 = 0x10000;
49
50struct VmInstantiationTimer {
51    #[cfg(not(target_family = "wasm"))]
52    host: Host,
53    #[cfg(not(target_family = "wasm"))]
54    start: std::time::Instant,
55}
56impl VmInstantiationTimer {
57    fn new(_host: Host) -> Self {
58        VmInstantiationTimer {
59            #[cfg(not(target_family = "wasm"))]
60            host: _host,
61            #[cfg(not(target_family = "wasm"))]
62            start: std::time::Instant::now(),
63        }
64    }
65}
66#[cfg(not(target_family = "wasm"))]
67impl Drop for VmInstantiationTimer {
68    fn drop(&mut self) {
69        let _ = self.host.as_budget().track_time(
70            ContractCostType::VmInstantiation,
71            self.start.elapsed().as_nanos() as u64,
72        );
73    }
74}
75
76/// A [Vm] is a thin wrapper around an instance of [wasmi::Module]. Multiple
77/// [Vm]s may be held in a single [Host], and each contains a single WASM module
78/// instantiation.
79///
80/// [Vm] rejects modules with either floating point or start functions.
81///
82/// [Vm] is configured to use its [Host] as a source of WASM imports.
83/// Specifically [Host] implements [wasmi::ImportResolver] by resolving all and
84/// only the functions declared in [Env](crate::Env) as imports, if requested by the
85/// WASM module. Any other lookups on any tables other than import functions
86/// will fail.
87pub struct Vm {
88    pub(crate) contract_id: Hash,
89    #[allow(dead_code)]
90    pub(crate) module: Rc<ParsedModule>,
91    store: RefCell<Store<Host>>,
92    instance: Instance,
93    pub(crate) memory: Option<Memory>,
94}
95
96impl std::hash::Hash for Vm {
97    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
98        self.contract_id.hash(state);
99    }
100}
101
102impl Host {
103    pub(crate) fn make_linker(
104        engine: &wasmi::Engine,
105        symbols: &BTreeSet<(&str, &str)>,
106    ) -> Result<Linker<Host>, HostError> {
107        let mut linker = Linker::new(&engine);
108        for hf in HOST_FUNCTIONS {
109            if symbols.contains(&(hf.mod_str, hf.fn_str)) {
110                (hf.wrap)(&mut linker).map_err(|le| wasmi::Error::Linker(le))?;
111            }
112        }
113        Ok(linker)
114    }
115}
116
117// In one very narrow context -- when recording, and with a module cache -- we
118// defer the cost of parsing a module until we pop a control frame.
119// Unfortunately we have to thread this information from the call site to here.
120// See comment below where this type is used.
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub(crate) enum ModuleParseCostMode {
123    Normal,
124    #[cfg(any(test, feature = "recording_mode"))]
125    PossiblyDeferredIfRecording,
126}
127
128impl Vm {
129    #[cfg(feature = "testutils")]
130    pub fn get_all_host_functions() -> Vec<(&'static str, &'static str, u32)> {
131        HOST_FUNCTIONS
132            .iter()
133            .map(|hf| (hf.mod_str, hf.fn_str, hf.arity))
134            .collect()
135    }
136
137    /// Instantiates a VM given the arguments provided in [`Self::new`],
138    /// or [`Self::new_from_module_cache`]
139    fn instantiate(
140        host: &Host,
141        contract_id: Hash,
142        parsed_module: Rc<ParsedModule>,
143        linker: &Linker<Host>,
144    ) -> Result<Rc<Self>, HostError> {
145        let _span = tracy_span!("Vm::instantiate");
146
147        let engine = parsed_module.module.engine();
148        let mut store = Store::new(engine, host.clone());
149
150        parsed_module.cost_inputs.charge_for_instantiation(host)?;
151
152        store.limiter(|host| host);
153
154        {
155            // We perform instantiation-time protocol version gating of
156            // all module-imported symbols here.
157            // Reasons for doing link-time instead of run-time check:
158            // 1. VM instantiation is performed in both contract upload and
159            //    execution, thus any errorous contract will be rejected at
160            //    upload time.
161            // 2. If a contract contains a call to an outdated host function,
162            //    i.e. `contract_protocol > hf.max_supported_protocol`, failing
163            //    early is preferred from resource usage perspective.
164            // 3. If a contract contains a call to an non-existent host
165            //    function, the current (correct) behavior is to return
166            //    `Wasmi::errors::LinkerError::MissingDefinition` error (which gets
167            //    converted to a `(WasmVm, InvalidAction)`). If that host
168            //    function is defined in a later protocol, and we replay that
169            //    contract (in the earlier protocol where it belongs), we need
170            //    to return the same error.
171            let _span0 = tracy_span!("define host functions");
172            let ledger_proto = host.with_ledger_info(|li| Ok(li.protocol_version))?;
173            parsed_module.with_import_symbols(host, |module_symbols| {
174                for hf in HOST_FUNCTIONS {
175                    if !module_symbols.contains(&(hf.mod_str, hf.fn_str)) {
176                        continue;
177                    }
178                    if let Some(min_proto) = hf.min_proto {
179                        if parsed_module.proto_version < min_proto || ledger_proto < min_proto {
180                            return Err(host.err(
181                                ScErrorType::WasmVm,
182                                ScErrorCode::InvalidAction,
183                                "contract calls a host function not yet supported by current protocol",
184                                &[],
185                            ));
186                        }
187                    }
188                    if let Some(max_proto) = hf.max_proto {
189                        if parsed_module.proto_version > max_proto || ledger_proto > max_proto {
190                            return Err(host.err(
191                                ScErrorType::WasmVm,
192                                ScErrorCode::InvalidAction,
193                                "contract calls a host function no longer supported in the current protocol",
194                                &[],
195                            ));
196                        }
197                    }
198                }
199                Ok(())
200            })?;
201        }
202
203        let not_started_instance = {
204            let _span0 = tracy_span!("instantiate module");
205            host.map_err(linker.instantiate(&mut store, &parsed_module.module))?
206        };
207
208        let instance = host.map_err(
209            not_started_instance
210                .ensure_no_start(&mut store)
211                .map_err(|ie| wasmi::Error::Instantiation(ie)),
212        )?;
213
214        let memory = if let Some(ext) = instance.get_export(&mut store, "memory") {
215            ext.into_memory()
216        } else {
217            None
218        };
219
220        // Here we do _not_ supply the store with any fuel. Fuel is supplied
221        // right before the VM is being run, i.e., before crossing the host->VM
222        // boundary.
223        Ok(Rc::new(Self {
224            contract_id,
225            module: parsed_module,
226            store: RefCell::new(store),
227            instance,
228            memory,
229        }))
230    }
231
232    pub fn from_parsed_module(
233        host: &Host,
234        contract_id: Hash,
235        parsed_module: Rc<ParsedModule>,
236    ) -> Result<Rc<Self>, HostError> {
237        let _span = tracy_span!("Vm::from_parsed_module");
238        VmInstantiationTimer::new(host.clone());
239        if let Some(linker) = &*host.try_borrow_linker()? {
240            Self::instantiate(host, contract_id, parsed_module, linker)
241        } else {
242            let linker = parsed_module.make_linker(host)?;
243            Self::instantiate(host, contract_id, parsed_module, &linker)
244        }
245    }
246
247    /// Constructs a new instance of a [Vm] within the provided [Host],
248    /// establishing a new execution context for a contract identified by
249    /// `contract_id` with Wasm bytecode provided in `module_wasm_code`.
250    ///
251    /// This function performs several steps:
252    ///
253    ///   - Parses and performs Wasm validation on the module.
254    ///   - Checks that the module contains an [meta::INTERFACE_VERSION] that
255    ///     matches the host.
256    ///   - Checks that the module has no floating point code or `start`
257    ///     function, or post-MVP wasm extensions.
258    ///   - Instantiates the module, leaving it ready to accept function
259    ///     invocations.
260    ///   - Looks up and caches its linear memory export named `memory`
261    ///     if it exists.
262    ///
263    /// With the introduction of the granular cost inputs this method
264    /// should only be used for the one-off full parses of the new Wasms
265    /// during the initial upload verification.
266
267    pub fn new(host: &Host, contract_id: Hash, wasm: &[u8]) -> Result<Rc<Self>, HostError> {
268        let cost_inputs = VersionedContractCodeCostInputs::V0 {
269            wasm_bytes: wasm.len(),
270        };
271        Self::new_with_cost_inputs(
272            host,
273            contract_id,
274            wasm,
275            cost_inputs,
276            ModuleParseCostMode::Normal,
277        )
278    }
279
280    pub(crate) fn new_with_cost_inputs(
281        host: &Host,
282        contract_id: Hash,
283        wasm: &[u8],
284        cost_inputs: VersionedContractCodeCostInputs,
285        cost_mode: ModuleParseCostMode,
286    ) -> Result<Rc<Self>, HostError> {
287        let _span = tracy_span!("Vm::new");
288        VmInstantiationTimer::new(host.clone());
289        let parsed_module = Self::parse_module(host, wasm, cost_inputs, cost_mode)?;
290        let linker = parsed_module.make_linker(host)?;
291        Self::instantiate(host, contract_id, parsed_module, &linker)
292    }
293
294    #[cfg(not(any(test, feature = "recording_mode")))]
295    fn parse_module(
296        host: &Host,
297        wasm: &[u8],
298        cost_inputs: VersionedContractCodeCostInputs,
299        _cost_mode: ModuleParseCostMode,
300    ) -> Result<Rc<ParsedModule>, HostError> {
301        ParsedModule::new_with_isolated_engine(host, wasm, cost_inputs)
302    }
303
304    /// This method exists to support [crate::storage::FootprintMode::Recording]
305    /// when running in protocol versions that feature the [ModuleCache].
306    ///
307    /// There are two ways we can get to here:
308    ///
309    ///   1. When we're running in a protocol that doesn't support the
310    ///   [ModuleCache] at all. In this case, we just parse the module and
311    ///   charge for it as normal.
312    ///
313    ///   2. When we're in a protocol that _does_ support the [ModuleCache] but
314    ///   are _also_ in [crate::storage::FootprintMode::Recording] mode and
315    ///   _also_ being instantiated from [Host::call_contract_fn]. Then the
316    ///   [ModuleCache] _did not get built_ during host setup (because we had
317    ///   no footprint yet to buid the cache from), so our caller
318    ///   [Host::call_contract_fn] sees no module cache, and so each call winds
319    ///   up calling us here, reparsing each module as it's called, and then
320    ///   throwing it away.
321    ///
322    /// When we are in case 2, we don't want to charge for all those reparses:
323    /// we want to charge only for the post-parse instantiations _as if_ we had
324    /// had the cache. The cache will actually be added in [Host::pop_context]
325    /// _after_ a top-level recording-mode invocation completes, by reading the
326    /// storage and parsing all the modules in it, in order to charge for
327    /// parsing each used module _once_ and thereby produce a mostly-correct
328    /// total cost.
329    ///
330    /// We still charge the reparses to the shadow budget, to avoid a DoS risk,
331    /// and we still charge the instantiations to the real budget, to behave the
332    /// same as if we had a cache.
333    ///
334    /// Finally, for those scratching their head about the overall structure:
335    /// all of this happens as a result of the "module cache" not being
336    /// especially cache-like (i.e. not being populated lazily, on-access). It's
337    /// populated all at once, up front, because wasmi does not allow adding
338    /// modules to an engine that's currently running.
339    #[cfg(any(test, feature = "recording_mode"))]
340    fn parse_module(
341        host: &Host,
342        wasm: &[u8],
343        cost_inputs: VersionedContractCodeCostInputs,
344        cost_mode: ModuleParseCostMode,
345    ) -> Result<Rc<ParsedModule>, HostError> {
346        if cost_mode == ModuleParseCostMode::PossiblyDeferredIfRecording
347            && host.get_ledger_protocol_version()? >= ModuleCache::MIN_LEDGER_VERSION
348        {
349            if host.in_storage_recording_mode()? {
350                return host.budget_ref().with_observable_shadow_mode(|| {
351                    ParsedModule::new_with_isolated_engine(host, wasm, cost_inputs)
352                });
353            }
354        }
355        ParsedModule::new_with_isolated_engine(host, wasm, cost_inputs)
356    }
357
358    pub(crate) fn get_memory(&self, host: &Host) -> Result<Memory, HostError> {
359        match self.memory {
360            Some(mem) => Ok(mem),
361            None => Err(host.err(
362                ScErrorType::WasmVm,
363                ScErrorCode::MissingValue,
364                "no linear memory named `memory`",
365                &[],
366            )),
367        }
368    }
369
370    // Wrapper for the [`Func`] call which is metered as a component.
371    // Resolves the function entity, and takes care the conversion between and
372    // tranfering of the host budget / VM fuel. This is where the host->VM->host
373    // boundaries are crossed.
374    pub(crate) fn metered_func_call(
375        self: &Rc<Self>,
376        host: &Host,
377        func_sym: &Symbol,
378        inputs: &[Value],
379    ) -> Result<Val, HostError> {
380        host.charge_budget(ContractCostType::InvokeVmFunction, None)?;
381
382        // resolve the function entity to be called
383        let func_ss: SymbolStr = func_sym.try_into_val(host)?;
384        let ext = match self
385            .instance
386            .get_export(&*self.store.try_borrow_or_err()?, func_ss.as_ref())
387        {
388            None => {
389                return Err(host.err(
390                    ScErrorType::WasmVm,
391                    ScErrorCode::MissingValue,
392                    "invoking unknown export",
393                    &[func_sym.to_val()],
394                ))
395            }
396            Some(e) => e,
397        };
398        let func = match ext.into_func() {
399            None => {
400                return Err(host.err(
401                    ScErrorType::WasmVm,
402                    ScErrorCode::UnexpectedType,
403                    "export is not a function",
404                    &[func_sym.to_val()],
405                ))
406            }
407            Some(e) => e,
408        };
409
410        if inputs.len() > MAX_VM_ARGS {
411            return Err(host.err(
412                ScErrorType::WasmVm,
413                ScErrorCode::InvalidInput,
414                "Too many arguments in wasm invocation",
415                &[func_sym.to_val()],
416            ));
417        }
418
419        // call the function
420        let mut wasm_ret: [Value; 1] = [Value::I64(0)];
421        self.store.try_borrow_mut_or_err()?.add_fuel_to_vm(host)?;
422        // Metering: the `func.call` will trigger `wasmi::Call` (or `CallIndirect`) instruction,
423        // which is technically covered by wasmi fuel metering. So we are double charging a bit
424        // here (by a few 100s cpu insns). It is better to be safe.
425        let res = func.call(
426            &mut *self.store.try_borrow_mut_or_err()?,
427            inputs,
428            &mut wasm_ret,
429        );
430        // Due to the way wasmi's fuel metering works (it does `remaining.checked_sub(delta).ok_or(Trap)`),
431        // there may be a small amount of fuel (less than delta -- the fuel cost of that failing
432        // wasmi instruction) remaining when the `OutOfFuel` trap occurs. This is only observable
433        // if the contract traps with `OutOfFuel`, which may appear confusing if they look closely
434        // at the budget amount consumed. So it should be fine.
435        self.store
436            .try_borrow_mut_or_err()?
437            .return_fuel_to_host(host)?;
438
439        if let Err(e) = res {
440            use std::borrow::Cow;
441
442            // When a call fails with a wasmi::Error::Trap that carries a HostError
443            // we propagate that HostError as is, rather than producing something new.
444
445            match e {
446                wasmi::Error::Trap(trap) => {
447                    if let Some(code) = trap.trap_code() {
448                        let err = code.into();
449                        let mut msg = Cow::Borrowed("VM call trapped");
450                        host.with_debug_mode(|| {
451                            msg = Cow::Owned(format!("VM call trapped: {:?}", &code));
452                            Ok(())
453                        });
454                        return Err(host.error(err, &msg, &[func_sym.to_val()]));
455                    }
456                    if let Some(he) = trap.downcast::<HostError>() {
457                        host.log_diagnostics(
458                            "VM call trapped with HostError",
459                            &[func_sym.to_val(), he.error.to_val()],
460                        );
461                        return Err(he);
462                    }
463                    return Err(host.err(
464                        ScErrorType::WasmVm,
465                        ScErrorCode::InternalError,
466                        "VM trapped but propagation failed",
467                        &[],
468                    ));
469                }
470                e => {
471                    let mut msg = Cow::Borrowed("VM call failed");
472                    host.with_debug_mode(|| {
473                        msg = Cow::Owned(format!("VM call failed: {:?}", &e));
474                        Ok(())
475                    });
476                    return Err(host.error(e.into(), &msg, &[func_sym.to_val()]));
477                }
478            }
479        }
480        host.relative_to_absolute(
481            Val::try_marshal_from_value(wasm_ret[0].clone()).ok_or(ConversionError)?,
482        )
483    }
484
485    pub(crate) fn invoke_function_raw(
486        self: &Rc<Self>,
487        host: &Host,
488        func_sym: &Symbol,
489        args: &[Val],
490    ) -> Result<Val, HostError> {
491        let _span = tracy_span!("Vm::invoke_function_raw");
492        Vec::<Value>::charge_bulk_init_cpy(args.len() as u64, host.as_budget())?;
493        let wasm_args: Vec<Value> = args
494            .iter()
495            .map(|i| host.absolute_to_relative(*i).map(|v| v.marshal_from_self()))
496            .collect::<Result<Vec<Value>, HostError>>()?;
497        self.metered_func_call(host, func_sym, wasm_args.as_slice())
498    }
499
500    /// Returns the raw bytes content of a named custom section from the WASM
501    /// module loaded into the [Vm], or `None` if no such custom section exists.
502    pub fn custom_section(&self, name: impl AsRef<str>) -> Option<&[u8]> {
503        self.module.custom_section(name)
504    }
505
506    /// Utility function that synthesizes a `VmCaller<Host>` configured to point
507    /// to this VM's `Store` and `Instance`, and calls the provided function
508    /// back with it. Mainly used for testing.
509    pub(crate) fn with_vmcaller<F, T>(&self, f: F) -> Result<T, HostError>
510    where
511        F: FnOnce(&mut VmCaller<Host>) -> Result<T, HostError>,
512    {
513        let store: &mut Store<Host> = &mut *self.store.try_borrow_mut_or_err()?;
514        let mut ctx: StoreContextMut<Host> = store.into();
515        let caller: Caller<Host> = Caller::new(&mut ctx, Some(&self.instance));
516        let mut vmcaller: VmCaller<Host> = VmCaller(Some(caller));
517        f(&mut vmcaller)
518    }
519
520    #[cfg(feature = "bench")]
521    pub(crate) fn with_caller<F, T>(&self, f: F) -> Result<T, HostError>
522    where
523        F: FnOnce(Caller<Host>) -> Result<T, HostError>,
524    {
525        let store: &mut Store<Host> = &mut *self.store.try_borrow_mut_or_err()?;
526        let mut ctx: StoreContextMut<Host> = store.into();
527        let caller: Caller<Host> = Caller::new(&mut ctx, Some(&self.instance));
528        f(caller)
529    }
530
531    pub(crate) fn memory_hash_and_size(&self, budget: &Budget) -> Result<(u64, usize), HostError> {
532        use std::hash::Hasher;
533        if let Some(mem) = self.memory {
534            self.with_vmcaller(|vmcaller| {
535                let mut state = CountingHasher::default();
536                let data = mem.data(vmcaller.try_ref()?);
537                data.metered_hash(&mut state, budget)?;
538                Ok((state.finish(), data.len()))
539            })
540        } else {
541            Ok((0, 0))
542        }
543    }
544
545    // This is pretty weak: we just observe the state that wasmi exposes through
546    // wasm _exports_. There might be tables or globals a wasm doesn't export
547    // but there's no obvious way to observe them.
548    pub(crate) fn exports_hash_and_size(&self, budget: &Budget) -> Result<(u64, usize), HostError> {
549        use std::hash::Hasher;
550        use wasmi::{Extern, StoreContext};
551        self.with_vmcaller(|vmcaller| {
552            let ctx: StoreContext<'_, _> = vmcaller.try_ref()?.into();
553            let mut size: usize = 0;
554            let mut state = CountingHasher::default();
555            for export in self.instance.exports(vmcaller.try_ref()?) {
556                size = size.saturating_add(1);
557                export.name().metered_hash(&mut state, budget)?;
558
559                match export.into_extern() {
560                    // Funcs are immutable, memory we hash separately above.
561                    Extern::Func(_) | Extern::Memory(_) => (),
562
563                    Extern::Table(t) => {
564                        let sz = t.size(&ctx);
565                        sz.metered_hash(&mut state, budget)?;
566                        size = size.saturating_add(sz as usize);
567                        for i in 0..sz {
568                            if let Some(elem) = t.get(&ctx, i) {
569                                // This is a slight fudge to avoid having to
570                                // define a ton of additional MeteredHash impls
571                                // for wasmi substructures, since there is a
572                                // bounded size on the string representation of
573                                // a value, we're comfortable going temporarily
574                                // over budget here.
575                                let s = format!("{:?}", elem);
576                                budget.charge(ContractCostType::MemAlloc, Some(s.len() as u64))?;
577                                s.metered_hash(&mut state, budget)?;
578                            }
579                        }
580                    }
581                    Extern::Global(g) => {
582                        let s = format!("{:?}", g.get(&ctx));
583                        budget.charge(ContractCostType::MemAlloc, Some(s.len() as u64))?;
584                        s.metered_hash(&mut state, budget)?;
585                    }
586                }
587            }
588            Ok((state.finish(), size))
589        })
590    }
591}
592
593/// A trait that VMs that want to work with a custom context should
594/// implement.
595pub trait CustomContextVM {
596    /// Return WASMI's Memory handle.
597    fn read(&self, mem_pos: usize, buf: &mut [u8]);
598
599    fn data(&self) -> &[u8];
600
601    fn write(&mut self, pos: u32, slice: &[u8]) -> i64;
602
603    fn data_mut(&mut self) -> &mut [u8];
604}