concordium_wasm/
artifact.rs

1//! This module defines the notion of the [`Artifact`], which is a processed and
2//! instantiated module that can have its exposed methods invoked via the
3//! [`Artifact::run`] method.
4//!
5//! The module in this section is in a format where serialization and
6//! deserialization are straightforward and cheap.
7
8use crate::{
9    constants::MAX_NUM_PAGES,
10    types::*,
11    validate::{
12        validate, Handler, HasValidationContext, LocalsRange, Reachability, ValidationState,
13    },
14};
15use anyhow::{anyhow, bail, ensure, Context};
16use derive_more::{Display, From, Into};
17use std::{
18    collections::{BTreeMap, BTreeSet},
19    convert::{TryFrom, TryInto},
20    io::Write,
21    sync::Arc,
22};
23
24#[derive(Copy, Clone)]
25/// Either a short or long integer.
26/// The reason this is a union is that after Wasm module validation we are
27/// guaranteed that the program is well typed. Since all instructions have
28/// clear, fixed types, we can determine from the instruction which value we
29/// expect on the stack. Using a union saves on the discriminant compared to
30/// using an enum, leading to 50% less space used on the stack, as well as
31/// removes the need to handle impossible cases.
32#[repr(C)]
33pub union StackValue {
34    pub short: i32,
35    pub long:  i64,
36}
37
38/// The debug implementation does not print the actual value. Instead it always
39/// displays `StackValue`. It exists so that structures containing stack values
40/// can have useful [`Debug`] implementations.
41impl std::fmt::Debug for StackValue {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("StackValue") }
43}
44
45impl From<i32> for StackValue {
46    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
47    fn from(short: i32) -> Self {
48        Self {
49            short,
50        }
51    }
52}
53
54impl From<u32> for StackValue {
55    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
56    fn from(short: u32) -> Self {
57        Self {
58            short: short as i32,
59        }
60    }
61}
62
63impl From<i64> for StackValue {
64    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
65    fn from(long: i64) -> Self {
66        Self {
67            long,
68        }
69    }
70}
71
72impl From<u64> for StackValue {
73    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
74    fn from(long: u64) -> Self {
75        Self {
76            long: long as i64,
77        }
78    }
79}
80
81impl From<GlobalInit> for StackValue {
82    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
83    fn from(g: GlobalInit) -> Self {
84        match g {
85            GlobalInit::I32(short) => Self {
86                short,
87            },
88            GlobalInit::I64(long) => Self {
89                long,
90            },
91        }
92    }
93}
94
95#[derive(Debug, Clone)]
96/// A fully instantiated table. This is possible because in the Wasm
97/// specification we have, the only way to write functions to the table is via
98/// the elements section of the module. Since we ensure the table is small
99/// enough we can afford to initialize it at compile time.
100pub struct InstantiatedTable {
101    pub functions: Vec<Option<FuncIndex>>,
102}
103
104#[derive(Debug, Clone)]
105/// Fully instantiated globals with initial values.
106pub struct InstantiatedGlobals {
107    pub inits: Vec<GlobalInit>,
108}
109
110#[derive(Debug, Clone)]
111/// The data segment of the artifact. This is a slightly processed
112/// data segment of the module. In contrast to the table we cannot use
113/// the same trick of initializing it here. In practice data segments
114/// are at high offsets, which would lead to big artifacts. Thus we
115/// store it pretty much in the same way that it was when it was part
116/// of the source, except we have resolved the offset.
117pub struct ArtifactData {
118    /// Where to start initializing.
119    pub offset: i32,
120    /// The bytes to initialize with.
121    pub init:   Vec<u8>,
122}
123
124impl From<Data> for ArtifactData {
125    fn from(d: Data) -> Self {
126        Self {
127            offset: d.offset,
128            init:   d.init,
129        }
130    }
131}
132
133#[derive(Debug, Clone)]
134/// Memory of the artifact, with initial size, as well as maximum size set.
135/// If the maximum size is not part of the original module we set it to the
136/// [constants::MAX_NUM_PAGES](../constants/constant.MAX_NUM_PAGES.html)
137pub struct ArtifactMemory {
138    pub init_size: u32,
139    pub max_size:  u32,
140    pub init:      Vec<ArtifactData>,
141}
142
143/// A local variable declaration in a function.
144/// Because we know there are not going to be more than 2^16-1 locals we can
145/// store multiplicity more efficiently.
146#[derive(Debug, Clone, Copy)]
147pub struct ArtifactLocal {
148    pub(crate) multiplicity: u16,
149    pub(crate) ty:           ValueType,
150}
151
152impl From<ValueType> for ArtifactLocal {
153    fn from(ty: ValueType) -> Self {
154        Self {
155            ty,
156            multiplicity: 1,
157        }
158    }
159}
160
161impl TryFrom<Local> for ArtifactLocal {
162    type Error = anyhow::Error;
163
164    fn try_from(value: Local) -> Result<Self, Self::Error> {
165        let multiplicity = value.multiplicity.try_into()?;
166        Ok(Self {
167            ty: value.ty,
168            multiplicity,
169        })
170    }
171}
172
173#[derive(Debug, Clone)]
174/// A function which has been processed into a form suitable for execution.
175pub struct CompiledFunction {
176    type_idx:      TypeIndex,
177    return_type:   BlockType,
178    /// Parameters of the function.
179    params:        Vec<ValueType>,
180    /// Number of locals, cached, but should match what is in the
181    /// locals vector below.
182    num_locals:    u32,
183    /// Vector of types of locals. This __does not__ include function
184    /// parameters.
185    locals:        Vec<ArtifactLocal>,
186    /// Maximum number of locations needed. This includes parameters,
187    /// locals, and any extra locations needed to preserve values.
188    num_registers: u32,
189    /// The constants in the function.
190    constants:     Vec<i64>,
191    code:          Instructions,
192}
193
194#[derive(Debug)]
195/// A borrowed variant of [CompiledFunction](./struct.CompiledFunction.html)
196/// that does not own the body and locals. This is used to make deserialization
197/// of artifacts cheaper.
198pub struct CompiledFunctionBytes<'a> {
199    pub(crate) type_idx:      TypeIndex,
200    pub(crate) return_type:   BlockType,
201    pub(crate) params:        &'a [ValueType],
202    /// Vector of types of locals. This __does not__ include
203    /// parameters.
204    /// FIXME: It would be ideal to have this as a zero-copy structure,
205    /// but it likely does not matter, and it would be more error-prone.
206    pub(crate) num_locals:    u32,
207    pub(crate) locals:        Vec<ArtifactLocal>,
208    /// Maximum number of locations needed. This includes parameters,
209    /// locals, and any extra locations needed to preserve values.
210    pub(crate) num_registers: u32,
211    /// The constants in the function. In principle we could make this zero-copy
212    /// (with some complexity due to alignment) but the added complexity is
213    /// not worth it.
214    pub(crate) constants:     Vec<i64>,
215    pub(crate) code:          &'a [u8],
216}
217
218impl<'a> From<CompiledFunctionBytes<'a>> for CompiledFunction {
219    fn from(cfb: CompiledFunctionBytes<'a>) -> Self {
220        Self {
221            type_idx:      cfb.type_idx,
222            return_type:   cfb.return_type,
223            params:        cfb.params.to_vec(),
224            num_locals:    cfb.num_locals,
225            locals:        cfb.locals,
226            num_registers: cfb.num_registers,
227            constants:     cfb.constants,
228            code:          cfb.code.to_vec().into(),
229        }
230    }
231}
232
233/// Try to process an import into something that is perhaps more suitable for
234/// execution, i.e., quicker to resolve.
235pub trait TryFromImport: Sized {
236    fn try_from_import(ty: &[FunctionType], import: Import) -> CompileResult<Self>;
237    fn ty(&self) -> &FunctionType;
238}
239
240/// An example of a processed import with minimal processing. Useful for testing
241/// and experimenting, but not for efficient execution.
242#[derive(Debug, Clone, Display)]
243#[display(fmt = "{}.{}", mod_name, item_name)]
244pub struct ArtifactNamedImport {
245    pub(crate) mod_name:  Name,
246    pub(crate) item_name: Name,
247    pub(crate) ty:        FunctionType,
248}
249
250impl ArtifactNamedImport {
251    pub fn matches(&self, mod_name: &str, item_name: &str) -> bool {
252        self.mod_name.as_ref() == mod_name && self.item_name.as_ref() == item_name
253    }
254
255    pub fn get_mod_name(&self) -> &str { self.mod_name.as_ref() }
256
257    pub fn get_item_name(&self) -> &str { self.item_name.as_ref() }
258}
259
260impl TryFromImport for ArtifactNamedImport {
261    fn try_from_import(ty: &[FunctionType], import: Import) -> CompileResult<Self> {
262        match import.description {
263            ImportDescription::Func {
264                type_idx,
265            } => {
266                let ty = ty
267                    .get(type_idx as usize)
268                    .ok_or_else(|| anyhow!("Unknown type index."))?
269                    .clone();
270                Ok(Self {
271                    mod_name: import.mod_name,
272                    item_name: import.item_name,
273                    ty,
274                })
275            }
276        }
277    }
278
279    fn ty(&self) -> &FunctionType { &self.ty }
280}
281
282/// An iterator over local variables.
283pub struct LocalsIterator<'a> {
284    /// Number of locals that are still going to be yielded from the iterator.
285    remaining_items:      u32,
286    pub(crate) locals:    &'a [ArtifactLocal],
287    /// Current position in the locals list. Each local in the list can have a
288    /// multiplicity. This is the shorthand Wasm uses for declaring multiple
289    /// local variables of the same type.
290    current_item:         usize,
291    /// Current multiplicity of the `current_item`.
292    /// When advancing the iterator we keep increasing this until we exhaust the
293    /// local.
294    current_multiplicity: u16,
295}
296
297impl<'a> LocalsIterator<'a> {
298    /// Construct a new iterator given the total number of locals and a list of
299    /// locals with multiplicity. The total number of locals must be supplied so
300    /// that we don't have to go through the entire list of locals and sum up
301    /// their multiplicities.
302    pub fn new(num_locals: u32, locals: &'a [ArtifactLocal]) -> Self {
303        Self {
304            remaining_items: num_locals,
305            locals,
306            current_item: 0,
307            current_multiplicity: 0,
308        }
309    }
310}
311
312impl<'a> Iterator for LocalsIterator<'a> {
313    type Item = ValueType;
314
315    fn next(&mut self) -> Option<Self::Item> {
316        self.remaining_items.checked_sub(1)?;
317        let al = self.locals.get(self.current_item)?;
318        if self.current_multiplicity < al.multiplicity {
319            self.current_multiplicity += 1;
320            Some(al.ty)
321        } else {
322            self.current_item += 1;
323            self.current_multiplicity = 0;
324            self.next()
325        }
326    }
327
328    fn size_hint(&self) -> (usize, Option<usize>) {
329        (self.remaining_items as usize, Some(self.remaining_items as usize))
330    }
331}
332
333impl<'a> ExactSizeIterator for LocalsIterator<'a> {
334    fn len(&self) -> usize { self.remaining_items as usize }
335}
336
337/// A trait encapsulating the properties that are needed to run a function.
338/// This trait exists because we have two different kinds of code we run. A
339/// fully deserialized code, i.e., where instructions are essentially
340/// `Vec<InternalOpcode>` or we execute directly from `&[u8]` if the origin of
341/// the code is a serialized structure, such as an [`Artifact`] retrieved from a
342/// database.
343pub trait RunnableCode {
344    /// The number of parameters of the function.
345    fn num_params(&self) -> u32;
346    /// The number of registers the function needs in the worst case.
347    /// This includes locals and parameters.
348    fn num_registers(&self) -> u32;
349    /// The number of distinct constants that appear in the function body.
350    fn constants(&self) -> &[i64];
351    /// The type of the function, as an index into the list of types of the
352    /// module.
353    fn type_idx(&self) -> TypeIndex;
354    /// The return type of the function.
355    fn return_type(&self) -> BlockType;
356    /// The types of function parameters.
357    fn params(&self) -> &[ValueType];
358    /// The number of locals declared by the function. This **does not** include
359    /// the function parameters, only declared locals.
360    fn num_locals(&self) -> u32;
361    /// An iterator over the locals (not including function parameters).
362    fn locals(&self) -> LocalsIterator<'_>;
363    /// A reference to the instructions to execute.
364    fn code(&self) -> &[u8];
365}
366
367impl RunnableCode for CompiledFunction {
368    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
369    fn num_params(&self) -> u32 { self.params.len() as u32 }
370
371    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
372    fn num_registers(&self) -> u32 { self.num_registers }
373
374    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
375    fn constants(&self) -> &[i64] { &self.constants }
376
377    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
378    fn type_idx(&self) -> TypeIndex { self.type_idx }
379
380    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
381    fn return_type(&self) -> BlockType { self.return_type }
382
383    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
384    fn params(&self) -> &[ValueType] { &self.params }
385
386    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
387    fn num_locals(&self) -> u32 { self.num_locals }
388
389    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
390    fn locals(&self) -> LocalsIterator { LocalsIterator::new(self.num_locals, &self.locals) }
391
392    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
393    fn code(&self) -> &[u8] { &self.code.bytes }
394}
395
396impl<'a> RunnableCode for CompiledFunctionBytes<'a> {
397    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
398    fn num_params(&self) -> u32 { self.params.len() as u32 }
399
400    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
401    fn num_registers(&self) -> u32 { self.num_registers }
402
403    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
404    fn constants(&self) -> &[i64] { &self.constants }
405
406    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
407    fn type_idx(&self) -> TypeIndex { self.type_idx }
408
409    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
410    fn return_type(&self) -> BlockType { self.return_type }
411
412    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
413    fn params(&self) -> &[ValueType] { self.params }
414
415    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
416    fn num_locals(&self) -> u32 { self.num_locals }
417
418    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
419    fn locals(&self) -> LocalsIterator { LocalsIterator::new(self.num_locals, &self.locals) }
420
421    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
422    fn code(&self) -> &[u8] { self.code }
423}
424
425/// Version of the artifact. We only support one version at present in this
426/// library, version 1, but older versions of the library supported a different
427/// version, so this versioning allows us to detect those older versions and
428/// apply migration as needed.
429///
430/// The artifact is always serialized such that it starts with a 4-byte version
431/// prefix, which enables us to detect older versions while only supporting the
432/// new version in the library.
433#[derive(Debug, Clone, Copy)]
434pub enum ArtifactVersion {
435    /// A more efficient instruction set representation that precompiles the
436    /// stack machine into a "register based" one where there is no more
437    /// stack during execution.
438    V1,
439}
440
441/// A processed Wasm module. This no longer has custom sections since they are
442/// not needed for further processing.
443/// The type parameter `ImportFunc` is instantiated with the representation of
444/// host functions. To efficiently and relatively safely execute the module we
445/// preprocess imported functions into an enum. However for testing we sometimes
446/// just use raw imports. This type parameter allows us flexibility.
447///
448/// The type parameter `CompiledCode` is used to allow flexibility in code
449/// representation. For testing uses it is convenient that the type is
450/// "owned", in the sense of it being a vector of instructions. For efficient
451/// execution, and to avoid deserialization, the code is represented as a byte
452/// array (i.e., as a slice of bytes `&[u8]`) when we execute it after looking
453/// the code up from the database.
454#[derive(Debug, Clone)]
455pub struct Artifact<ImportFunc, CompiledCode> {
456    pub version: ArtifactVersion,
457    /// Imports by (module name, item name).
458    pub imports: Vec<ImportFunc>,
459    /// Types of the module. These are needed for dynamic dispatch, i.e.,
460    /// call-indirect.
461    pub ty:      Vec<FunctionType>,
462    /// A fully instantiated table.
463    pub table:   InstantiatedTable,
464    /// The memory of the artifact.
465    pub memory:  Option<ArtifactMemory>,
466    /// Globals initialized with initial values.
467    pub global:  InstantiatedGlobals,
468    /// The exported functions.
469    /// Validation should ensure that an exported function is a defined one,
470    /// and not one of the imported ones.
471    /// Thus the index refers to the index in the code section.
472    pub export:  BTreeMap<Name, FuncIndex>,
473    /// The list of functions in the module.
474    pub code:    Vec<CompiledCode>,
475}
476
477/// Ar artifact which does not own the code to run. The code is only a reference
478/// to a byte array.
479pub type BorrowedArtifact<'a, ImportFunc> = Artifact<ImportFunc, CompiledFunctionBytes<'a>>;
480/// An artifact that owns the code to run.
481pub type OwnedArtifact<ImportFunc> = Artifact<ImportFunc, CompiledFunction>;
482
483/// Convert a borrowed artifact to an owned one. This allocates memory for all
484/// the code of the artifact so it should be used sparingly.
485impl<'a, ImportFunc> From<BorrowedArtifact<'a, ImportFunc>> for OwnedArtifact<ImportFunc> {
486    fn from(a: BorrowedArtifact<'a, ImportFunc>) -> Self {
487        let Artifact {
488            version,
489            imports,
490            ty,
491            table,
492            memory,
493            global,
494            export,
495            code,
496        } = a;
497        Self {
498            version,
499            imports,
500            ty,
501            table,
502            memory,
503            global,
504            export,
505            code: code.into_iter().map(CompiledFunction::from).collect::<Vec<_>>(),
506        }
507    }
508}
509
510/// Convert a borrowed artifact to an owned one inside an `Arc`. This allocates
511/// memory for all the code of the artifact so it should be used sparingly.
512impl<'a, ImportFunc> From<BorrowedArtifact<'a, ImportFunc>> for Arc<OwnedArtifact<ImportFunc>> {
513    fn from(a: BorrowedArtifact<'a, ImportFunc>) -> Self { Arc::new(a.into()) }
514}
515
516/// Internal opcode. This is mostly the same as [`OpCode`], but with control
517/// instructions resolved to jumps in the instruction sequence, and function
518/// calls processed.
519#[repr(u8)]
520#[derive(Debug, num_enum::TryFromPrimitive)]
521pub enum InternalOpcode {
522    // Control instructions
523    Unreachable = 0u8,
524    If,
525    Br,
526    BrIf,
527    BrTable,
528    BrTableCarry,
529    Return,
530    Call,
531    TickEnergy,
532    CallIndirect,
533
534    // Parametric instructions
535    Select,
536
537    // Variable instructions
538    GlobalGet,
539    GlobalSet,
540
541    // Memory instructions
542    I32Load,
543    I64Load,
544    I32Load8S,
545    I32Load8U,
546    I32Load16S,
547    I32Load16U,
548    I64Load8S,
549    I64Load8U,
550    I64Load16S,
551    I64Load16U,
552    I64Load32S,
553    I64Load32U,
554    I32Store,
555    I64Store,
556    I32Store8,
557    I32Store16,
558    I64Store8,
559    I64Store16,
560    I64Store32,
561    MemorySize,
562    MemoryGrow,
563
564    I32Eqz,
565    I32Eq,
566    I32Ne,
567    I32LtS,
568    I32LtU,
569    I32GtS,
570    I32GtU,
571    I32LeS,
572    I32LeU,
573    I32GeS,
574    I32GeU,
575    I64Eqz,
576    I64Eq,
577    I64Ne,
578    I64LtS,
579    I64LtU,
580    I64GtS,
581    I64GtU,
582    I64LeS,
583    I64LeU,
584    I64GeS,
585    I64GeU,
586
587    I32Clz,
588    I32Ctz,
589    I32Popcnt,
590    I32Add,
591    I32Sub,
592    I32Mul,
593    I32DivS,
594    I32DivU,
595    I32RemS,
596    I32RemU,
597    I32And,
598    I32Or,
599    I32Xor,
600    I32Shl,
601    I32ShrS,
602    I32ShrU,
603    I32Rotl,
604    I32Rotr,
605    I64Clz,
606    I64Ctz,
607    I64Popcnt,
608    I64Add,
609    I64Sub,
610    I64Mul,
611    I64DivS,
612    I64DivU,
613    I64RemS,
614    I64RemU,
615    I64And,
616    I64Or,
617    I64Xor,
618    I64Shl,
619    I64ShrS,
620    I64ShrU,
621    I64Rotl,
622    I64Rotr,
623
624    I32WrapI64,
625    I64ExtendI32S,
626    I64ExtendI32U,
627
628    // Sign extension instructions, optionally supported depending on the protocol version.
629    I32Extend8S,
630    I32Extend16S,
631    I64Extend8S,
632    I64Extend16S,
633    I64Extend32S,
634
635    Copy,
636}
637
638/// Result of compilation. Either Ok(_) or an error indicating the reason.
639pub type CompileResult<A> = anyhow::Result<A>;
640
641#[derive(Default, Debug, Clone, From, Into)]
642/// A sequence of internal opcodes, followed by any immediate arguments.
643pub struct Instructions {
644    pub(crate) bytes: Vec<u8>,
645}
646
647impl Instructions {
648    fn push(&mut self, opcode: InternalOpcode) { self.bytes.push(opcode as u8) }
649
650    fn push_u16(&mut self, x: u16) { self.bytes.extend_from_slice(&x.to_le_bytes()); }
651
652    fn push_u32(&mut self, x: u32) { self.bytes.extend_from_slice(&x.to_le_bytes()); }
653
654    fn push_i32(&mut self, x: i32) { self.bytes.extend_from_slice(&x.to_le_bytes()); }
655
656    fn current_offset(&self) -> usize { self.bytes.len() }
657
658    fn back_patch(&mut self, back_loc: usize, to_write: u32) -> CompileResult<()> {
659        let mut place: &mut [u8] = &mut self.bytes[back_loc..];
660        place.write_all(&to_write.to_le_bytes())?;
661        Ok(())
662    }
663}
664
665/// Target of a jump that we need to keep track of temporarily.
666#[derive(Debug)]
667enum JumpTarget {
668    /// We know the position in the instruction sequence where the jump should
669    /// resolve to. This is used in the case of loops since jumps to a loop
670    /// block jump to the beginning of the block.
671    Known {
672        pos: usize,
673    },
674    /// We do not yet know where in the instruction sequence we will jump to.
675    /// We record the list of places at which we need to back-patch the location
676    /// when we get to it.
677    Unknown {
678        /// List of locations where we need to insert target location of the
679        /// jump after this is determined.
680        backpatch_locations: Vec<usize>,
681        /// If the return type of the block is a value type (not `EmptyType`)
682        /// then this is the location where the value must be available
683        /// after every exit (either via jump or via terminating the block by
684        /// reaching the end of execution) of the block.
685        result:              Option<Provider>,
686    },
687}
688
689impl JumpTarget {
690    /// Insert a new jump target with unknown location (this is the case when
691    /// entering a block or if (but not loop)). The `result` indicates where
692    /// the result of the block should be available after every exit of the
693    /// block. It is `None` if the block's return type is `EmptyType`.
694    pub fn new_unknown(result: Option<Provider>) -> Self {
695        JumpTarget::Unknown {
696            backpatch_locations: Vec::new(),
697            result,
698        }
699    }
700
701    /// Similar to [`new_unknown`] except we already know one location at which
702    /// we will need to backpatch.
703    pub fn new_unknown_loc(pos: usize, result: Option<Provider>) -> Self {
704        JumpTarget::Unknown {
705            backpatch_locations: vec![pos],
706            result,
707        }
708    }
709
710    /// Construct a new known location `JumpTarget`.
711    pub fn new_known(pos: usize) -> Self {
712        JumpTarget::Known {
713            pos,
714        }
715    }
716}
717
718#[derive(Default)]
719/// Stack of jump targets
720struct BackPatchStack {
721    stack: Vec<JumpTarget>,
722}
723
724impl BackPatchStack {
725    pub fn push(&mut self, target: JumpTarget) { self.stack.push(target) }
726
727    pub fn pop(&mut self) -> CompileResult<JumpTarget> {
728        self.stack.pop().ok_or_else(|| anyhow!("Attempt to pop from an empty backpatch stack."))
729    }
730
731    pub fn get_mut(&mut self, n: LabelIndex) -> CompileResult<&mut JumpTarget> {
732        ensure!(
733            (n as usize) < self.stack.len(),
734            "Attempt to access label beyond the size of the stack."
735        );
736        let lookup_idx = self.stack.len() - n as usize - 1;
737        self.stack.get_mut(lookup_idx).ok_or_else(|| anyhow!("Attempt to access unknown label."))
738    }
739
740    pub fn get(&self, n: LabelIndex) -> CompileResult<&JumpTarget> {
741        ensure!(
742            (n as usize) < self.stack.len(),
743            "Attempt to access label beyond the size of the stack."
744        );
745        let lookup_idx = self.stack.len() - n as usize - 1;
746        self.stack.get(lookup_idx).ok_or_else(|| anyhow!("Attempt to access unknown label."))
747    }
748}
749
750/// A generator of dynamic locations needed in addition to the locals during
751/// execution.
752struct DynamicLocations {
753    /// The next location to give out if there are no reusable locations.
754    next_location:      i32,
755    /// A set of locations that are available for use again.
756    /// The two operations that are needed are getting a location out,
757    /// and returning it for reuse. We choose a set here so that we can also
758    /// always return the smallest location. Which location is reused first
759    /// is not relevant for correctness. It might affect performance a bit due
760    /// to memory locality, so reusing smaller locations should be better (since
761    /// locals are also small locations), but that's going to be very case
762    /// specific.
763    reusable_locations: BTreeSet<i32>,
764}
765
766impl DynamicLocations {
767    pub fn new(next_location: i32) -> Self {
768        Self {
769            next_location,
770            reusable_locations: BTreeSet::new(),
771        }
772    }
773
774    /// Inform that a given location, if it is a dynamic location, may be used
775    /// again.
776    pub fn reuse(&mut self, provider: Provider) {
777        if let Provider::Dynamic(idx) = provider {
778            self.reusable_locations.insert(idx);
779        }
780    }
781
782    /// Get the next available location.
783    pub fn get(&mut self) -> i32 {
784        if let Some(idx) = self.reusable_locations.pop_first() {
785            idx
786        } else {
787            let idx = self.next_location;
788            self.next_location += 1;
789            idx
790        }
791    }
792}
793
794#[derive(Copy, Clone, Debug, PartialEq, Eq)]
795/// A provider of a value for an instruction.
796/// This is a disjoint union of locations
797/// - locals which go from indices 0..num_locals (including parameters)
798/// - dynamic locations which go from indices `num_locals..num_registers`
799/// - constants which go from indices (-1).. (-however many constants there are
800///   in a function)
801enum Provider {
802    /// The provider is a dynamic location, not one of the locals.
803    Dynamic(i32),
804    /// A provider is a local declared in the Wasm code.
805    Local(i32),
806    /// The provider is a constant embedded in the code.
807    Constant(i32),
808}
809
810/// A stack of providers that is used to statically resolve locations where
811/// instructions will receive arguments.
812struct ProvidersStack {
813    /// The stack of providers. This is meant to correspond to the validation
814    /// stack but instead of types it has locations of where the value for
815    /// the instruction will be found at runtime.
816    stack:             Vec<Provider>,
817    /// An auxiliary structure used to keep track of available dynamic
818    /// locations. These locations are recycled when they no longer appear
819    /// on the stack.
820    dynamic_locations: DynamicLocations,
821    /// A mapping of constant values to their locations. The map is used to
822    /// deduplicate constants embedded in the code.
823    ///
824    /// Note that we coerce i32 constants to i64 constants and only keep track
825    /// of i64 values.
826    constants:         BTreeMap<i64, i32>,
827}
828
829impl ProvidersStack {
830    /// Construct a new [`ProvidersStack`] given the total number of locals
831    /// (parameters and declared locals).
832    pub fn new(num_locals: u32, return_type: Option<ValueType>) -> Self {
833        // We'll always write the return value of the function, if there is one, to
834        // location 0. So we make sure we have this location even if the function
835        // is without parameters and return values.
836        let next_location = if num_locals == 0 && return_type.is_some() {
837            1
838        } else {
839            num_locals
840        };
841        let dynamic_locations = DynamicLocations::new(next_location as i32);
842        Self {
843            stack: Vec::new(),
844            dynamic_locations,
845            constants: BTreeMap::new(),
846        }
847    }
848
849    /// Consume a provider from the top of the stack and recycle its location
850    /// in case it was a dynamic location.
851    pub fn consume(&mut self) -> CompileResult<Provider> {
852        let operand = self.stack.pop().context("Missing operand for consume")?;
853        // This is kind of expensive to run for each consume and we could do
854        // better by having a map of used locations to their place on the stack.
855        //
856        // However the benchmark using validation-time-consume.wat module indicates
857        // that performance is not an issue. We can optimize this in the future without
858        // breaking changes if we want to improve validation performance a couple of
859        // percent.
860        let is_unused = self.stack.iter().all(|x| *x != operand);
861        if is_unused {
862            self.dynamic_locations.reuse(operand);
863        }
864        Ok(operand)
865    }
866
867    /// Generate a fresh dynamic location and return its index.
868    pub fn provide(&mut self) -> i32 {
869        let result = self.dynamic_locations.get();
870        self.stack.push(Provider::Dynamic(result));
871        result
872    }
873
874    /// Push an existing provider to the providers stack.
875    pub fn provide_existing(&mut self, provider: Provider) {
876        self.stack.push(provider);
877        // Make sure that if the provider is on the stack it's not
878        // free for use. We rely on the property that locations
879        // returned from `dynamic_locations.get` are fresh.
880        if let Provider::Dynamic(idx) = provider {
881            self.dynamic_locations.reusable_locations.remove(&idx);
882        }
883    }
884
885    /// Return the number of values on the provider stack.
886    pub fn len(&self) -> usize { self.stack.len() }
887
888    /// Push a constant onto the provider stack.
889    pub fn push_constant(&mut self, c: i64) -> CompileResult<()> {
890        let next = -i32::try_from(self.constants.len())? - 1;
891        let idx = self.constants.entry(c).or_insert(next);
892        self.stack.push(Provider::Constant(*idx));
893        Ok(())
894    }
895
896    /// Truncate the provider stack to be **at most** `new_len` elements.
897    /// All the extra values are available for reuse.
898    pub fn truncate(&mut self, new_len: usize) -> CompileResult<()> {
899        let drop_len = self.stack.len().saturating_sub(new_len);
900        for _ in 0..drop_len {
901            self.consume()?;
902        }
903        Ok(())
904    }
905}
906
907/// An intermediate structure of the instruction sequence plus any pending
908/// backpatch locations we need to resolve.
909struct BackPatch {
910    out:              Instructions,
911    /// The list of locations we need to backpatch, that is, insert jump
912    /// locations at once we know where those jumps end, which is typically at
913    /// the `End` of a block.
914    backpatch:        BackPatchStack,
915    /// The provider stack. This mimics the operand stack but it additionally
916    /// records the register locations where the values are available for
917    /// instructions so that those registers can be added as immediate
918    /// arguments to instructions.
919    providers_stack:  ProvidersStack,
920    /// The return type of the function.
921    return_type:      Option<ValueType>,
922    /// If the last instruction produced something
923    /// in the dynamic area record the location here
924    /// so we can short-circuit the LocalSet that immediately
925    /// follows such an instruction.
926    last_provide_loc: Option<usize>,
927}
928
929impl BackPatch {
930    /// Construct a new instance. The `num_locals` argument is the number of
931    /// locals (this includes parameters + declared locals). The number of
932    /// locals is assumed to be within bounds ensured by the validation.
933    fn new(num_locals: u32, return_type: Option<ValueType>) -> Self {
934        Self {
935            out: Default::default(),
936            backpatch: BackPatchStack {
937                // The return value of a function, if any, will always be at index 0.
938                stack: vec![JumpTarget::new_unknown(return_type.map(|_| RETURN_VALUE_LOCATION))],
939            },
940            providers_stack: ProvidersStack::new(num_locals, return_type),
941            return_type,
942            last_provide_loc: None,
943        }
944    }
945
946    /// Write a provider into the output buffer.
947    fn push_loc(&mut self, loc: Provider) {
948        match loc {
949            Provider::Dynamic(idx) => {
950                self.out.push_i32(idx);
951            }
952            Provider::Constant(idx) => {
953                self.out.push_i32(idx);
954            }
955            Provider::Local(idx) => {
956                self.out.push_i32(idx);
957            }
958        }
959    }
960
961    /// Record a jump to the given label. If the location is already known (if
962    /// the target is a loop) then just record it immediately. Otherwise
963    /// insert a dummy value and record the location to "backpatch" when we
964    /// discover where the jump should end up in the instruction sequence.
965    fn insert_jump_location(&mut self, label_idx: LabelIndex) -> CompileResult<()> {
966        let target = self.backpatch.get_mut(label_idx)?;
967        match target {
968            JumpTarget::Known {
969                pos,
970                ..
971            } => {
972                self.out.push_u32(*pos as u32);
973            }
974            JumpTarget::Unknown {
975                backpatch_locations,
976                ..
977            } => {
978                // output a dummy value that will be backpatched after we pass the End of the
979                // block
980                backpatch_locations.push(self.out.current_offset());
981                self.out.push_u32(0u32);
982            }
983        }
984        Ok(())
985    }
986
987    /// Push a jump that is executed as a result of a `BrIf` instruction.
988    fn push_br_if_jump(&mut self, label_idx: LabelIndex) -> CompileResult<()> {
989        let target = self.backpatch.get(label_idx)?;
990        // Before a jump we must make sure that whenever execution ends up at the
991        // target label we will always have the value
992        if let JumpTarget::Unknown {
993            result: Some(result),
994            ..
995        } = target
996        {
997            let result = *result;
998            let provider = self.providers_stack.consume()?;
999            if provider != result {
1000                self.out.push(InternalOpcode::Copy);
1001                self.push_loc(provider);
1002                self.push_loc(result);
1003            }
1004            // After BrIf we need to potentially keep executing,
1005            // so keep the provider on the stack. But because we inserted a copy
1006            // the correct value is `result` on the top of the stack.
1007            self.providers_stack.provide_existing(result);
1008        }
1009        self.out.push(InternalOpcode::BrIf);
1010        self.insert_jump_location(label_idx)?;
1011        Ok(())
1012    }
1013
1014    /// Push a jump that is the result of a `Br` instruction.
1015    fn push_br_jump(
1016        &mut self,
1017        instruction_reachable: bool,
1018        label_idx: LabelIndex,
1019    ) -> CompileResult<()> {
1020        let target = self.backpatch.get(label_idx)?;
1021        // Before a jump we must make sure that whenever execution ends up at the
1022        // target label we will always have the value
1023        //
1024        // If we are in the unreachable section then the instruction
1025        // is never going to be reached, so there is no point in inserting
1026        // the Copy instruction.
1027        if instruction_reachable {
1028            if let JumpTarget::Unknown {
1029                backpatch_locations: _,
1030                result: Some(result),
1031            } = target
1032            {
1033                let result = *result;
1034                let provider = self.providers_stack.consume()?;
1035                if provider != result {
1036                    self.out.push(InternalOpcode::Copy);
1037                    self.push_loc(provider);
1038                    self.push_loc(result);
1039                }
1040            }
1041        }
1042        self.out.push(InternalOpcode::Br);
1043        self.insert_jump_location(label_idx)?;
1044        Ok(())
1045    }
1046
1047    /// Push a jump to the location specified by the `label_idx`.
1048    /// Used when compiling BrTable.
1049    fn push_br_table_jump(&mut self, label_idx: LabelIndex) -> CompileResult<()> {
1050        let target = self.backpatch.get(label_idx)?;
1051        if let JumpTarget::Unknown {
1052            backpatch_locations: _,
1053            result: Some(result),
1054        } = target
1055        {
1056            // When compiling a jump to a target which expects a value we
1057            // insert here the expected location of the value.
1058            // This is used by the BrTableCarry instruction handler to do a Copy
1059            // before jumping.
1060            let result = *result;
1061            self.push_loc(result);
1062        }
1063        self.insert_jump_location(label_idx)?;
1064        Ok(())
1065    }
1066
1067    // Push a binary operation, consuming two values and providing the result.
1068    fn push_binary(&mut self, opcode: InternalOpcode) -> CompileResult<()> {
1069        self.out.push(opcode);
1070        let _ = self.push_consume()?;
1071        let _ = self.push_consume()?;
1072        self.push_provide();
1073        Ok(())
1074    }
1075
1076    // Push a ternary operation, consuming three values and providing the result.
1077    fn push_ternary(&mut self, opcode: InternalOpcode) -> CompileResult<()> {
1078        self.out.push(opcode);
1079        let _ = self.push_consume()?;
1080        let _ = self.push_consume()?;
1081        let _ = self.push_consume()?;
1082        self.push_provide();
1083        Ok(())
1084    }
1085
1086    fn push_mem_load(&mut self, opcode: InternalOpcode, memarg: &MemArg) -> CompileResult<()> {
1087        self.out.push(opcode);
1088        self.out.push_u32(memarg.offset);
1089        let _operand = self.push_consume()?;
1090        self.push_provide();
1091        Ok(())
1092    }
1093
1094    fn push_mem_store(&mut self, opcode: InternalOpcode, memarg: &MemArg) -> CompileResult<()> {
1095        self.out.push(opcode);
1096        self.out.push_u32(memarg.offset);
1097        let _value = self.push_consume()?;
1098        let _location = self.push_consume()?;
1099        Ok(())
1100    }
1101
1102    fn push_unary(&mut self, opcode: InternalOpcode) -> CompileResult<()> {
1103        self.out.push(opcode);
1104        let _operand = self.push_consume()?;
1105        self.push_provide();
1106        Ok(())
1107    }
1108
1109    /// Write a newly generated location to the output buffer.
1110    /// This also records the location into `last_provide_loc` so that if
1111    /// this instruction is followed by a `SetLocal` (or `TeeLocal`) instruction
1112    /// the `SetLocal` is short-circuited in the sense that we tell the
1113    /// preceding instruction to directly write the value into the local (as
1114    /// long as this is safe, see the handling of `SetLocal` instruction).
1115    fn push_provide(&mut self) {
1116        let result = self.providers_stack.provide();
1117        self.last_provide_loc = Some(self.out.current_offset());
1118        self.out.push_i32(result);
1119    }
1120
1121    /// Consume an operand and return the slot that was consumed.
1122    fn push_consume(&mut self) -> CompileResult<Provider> {
1123        let operand = self.providers_stack.consume()?;
1124        self.push_loc(operand);
1125        Ok(operand)
1126    }
1127}
1128
1129/// We make a choice that we always have the return value of the function
1130/// available at index 0.
1131const RETURN_VALUE_LOCATION: Provider = Provider::Local(0);
1132
1133impl<Ctx: HasValidationContext> Handler<Ctx, &OpCode> for BackPatch {
1134    type Outcome = (Instructions, i32, Vec<i64>);
1135
1136    fn handle_opcode(
1137        &mut self,
1138        ctx: &Ctx,
1139        state: &ValidationState,
1140        reachability: Reachability,
1141        opcode: &OpCode,
1142    ) -> CompileResult<()> {
1143        use InternalOpcode::*;
1144        // The last location where the provider wrote the result.
1145        // This is used to short-circuit SetLocal
1146        let last_provide = self.last_provide_loc.take();
1147        // Short circuit the handling if the instruction is definitely not reachable.
1148        // Otherwise return if the instruction is directly reachable, or only reachable
1149        // through a jump (which is only the case if it is an end of a block, so either
1150        // End or Else instruction).
1151        let instruction_reachable = match reachability {
1152            Reachability::UnreachableFrame => return Ok(()),
1153            Reachability::UnreachableInstruction
1154            // Else and End instructions can be reached even in
1155            // unreachable segments because they can be a target of a jump.
1156                if !matches!(opcode, OpCode::Else | OpCode::End) =>
1157            {
1158                return Ok(())
1159            }
1160            Reachability::UnreachableInstruction => false,
1161            Reachability::Reachable => true,
1162        };
1163        match opcode {
1164            OpCode::End => {
1165                let jump_target = self.backpatch.pop()?;
1166                match jump_target {
1167                    JumpTarget::Known {
1168                        ..
1169                    } => {
1170                        // this is the end of a loop. Meaning the only way we
1171                        // can get to this location is by executing
1172                        // instructions in a sequence. This end cannot be jumped
1173                        // to.
1174                        //
1175                        // But if this is not reachable then we need to correct the
1176                        // stack to be of correct size by potentially pushing a dummy value to it
1177                        // since after entering an unreachable segment there is no guarantee
1178                        // that there is a value on the provider stack which would break the
1179                        // assumption that the provider stak is the same length as the operand
1180                        // stack.
1181                        //
1182                        // Note that the operand stack is already updated at this point.
1183                        if !instruction_reachable
1184                            && self.providers_stack.len() < state.opds.stack.len()
1185                        {
1186                            self.providers_stack.provide();
1187                        }
1188                    }
1189                    JumpTarget::Unknown {
1190                        backpatch_locations,
1191                        result,
1192                    } => {
1193                        // Insert an additional Copy instruction if the top of the provider stack
1194                        // does not match what is expected.
1195                        if let Some(result) = result {
1196                            // if we are in an unreachable segment then
1197                            // the stack might be empty at this point, and in general
1198                            // there is no point in inserting a copy instruction
1199                            // since it'll never be executed.
1200                            if instruction_reachable {
1201                                let provider = self.providers_stack.consume()?;
1202                                if provider != result {
1203                                    self.out.push(InternalOpcode::Copy);
1204                                    self.push_loc(provider);
1205                                    self.push_loc(result);
1206                                }
1207                            } else {
1208                                // There might not actually be anything at the top of the stack
1209                                // in the unreachable segment. But there might, in which case
1210                                // we must remove it to make sure that the `result` is at the top
1211                                // after the block ends.
1212                                // Note that the providers stack can never be shorter than
1213                                // `state.opds.stack.len()` at this point due to well-formedness of
1214                                // blocks.
1215                                if self.providers_stack.len() == state.opds.stack.len() {
1216                                    self.providers_stack.consume()?;
1217                                }
1218                            }
1219                            self.providers_stack.provide_existing(result);
1220                        }
1221
1222                        // As u32 would be safe here since module sizes are much less than 4GB, but
1223                        // we are being extra careful.
1224                        let current_pos: u32 = self.out.bytes.len().try_into()?;
1225                        for pos in backpatch_locations {
1226                            self.out.back_patch(pos, current_pos)?;
1227                        }
1228                    }
1229                }
1230            }
1231            OpCode::Block(ty) => {
1232                // If the block has a return value (in our version of Wasm that means a single
1233                // value only) then we need to reserve a location where this
1234                // value will be available after the block ends. The block end
1235                // can be reached in multiple ways, e.g. through jumps from multiple locations,
1236                // and it is crucial that all of those will yield end up writing the value that
1237                // is relevant at the same location. This is the location we
1238                // reserve here.
1239                let result = if matches!(ty, BlockType::ValueType(_)) {
1240                    let r = self.providers_stack.dynamic_locations.get();
1241                    Some(Provider::Dynamic(r))
1242                } else {
1243                    None
1244                };
1245                self.backpatch.push(JumpTarget::new_unknown(result));
1246            }
1247            OpCode::Loop(_ty) => {
1248                // In contrast to a `Block` or `If`, the only way an end of a block is reached
1249                // is through direct execution. Jumps cannot target it. Thus we
1250                // don't need to insert any copies or reserve any locations.
1251                self.backpatch.push(JumpTarget::new_known(self.out.current_offset()))
1252            }
1253            OpCode::If {
1254                ty,
1255            } => {
1256                self.out.push(If);
1257                self.push_consume()?;
1258                // Like for `Block`, we need to reserve a location that will have the resulting
1259                // value no matter how we end up at it.
1260                let result = if matches!(ty, BlockType::ValueType(_)) {
1261                    let r = self.providers_stack.dynamic_locations.get();
1262                    Some(Provider::Dynamic(r))
1263                } else {
1264                    None
1265                };
1266                self.backpatch.push(JumpTarget::new_unknown_loc(self.out.current_offset(), result));
1267                self.out.push_u32(0);
1268            }
1269            OpCode::Else => {
1270                // If we reached the else normally, after executing the if branch, we just break
1271                // to the end of else.
1272                self.push_br_jump(instruction_reachable, 0)?;
1273                // Because the module is well-formed this can only happen after an if
1274                // We do not backpatch the code now, apart from the initial jump to the else
1275                // branch. The effect of this will be that any break out of the if statement
1276                // will jump to the end of else, as intended.
1277                if let JumpTarget::Unknown {
1278                    backpatch_locations,
1279                    result: _,
1280                } = self.backpatch.get_mut(0)?
1281                {
1282                    // As u32 would be safe here since module sizes are much less than 4GB, but
1283                    // we are being extra careful.
1284                    let current_pos: u32 = self.out.bytes.len().try_into()?;
1285                    ensure!(
1286                        !backpatch_locations.is_empty(),
1287                        "Backpatch should contain at least the If start."
1288                    );
1289                    let first = backpatch_locations.remove(0);
1290                    self.out.back_patch(first, current_pos)?;
1291                } else {
1292                    bail!("Invariant violation in else branch.")
1293                }
1294            }
1295            OpCode::Br(label_idx) => {
1296                self.push_br_jump(instruction_reachable, *label_idx)?;
1297                // Everything after Br, until the end of the block is unreachable.
1298                self.providers_stack.truncate(state.opds.stack.len())?;
1299            }
1300            OpCode::BrIf(label_idx) => {
1301                // We output first the target and then the conditional source. This is
1302                // maybe not ideal since the conditional will sometimes not be
1303                // taken in which case we don't need to read that, but it's simpler.
1304                let condition_source = self.providers_stack.consume()?;
1305                self.push_br_if_jump(*label_idx)?;
1306                self.push_loc(condition_source);
1307            }
1308            OpCode::BrTable {
1309                labels,
1310                default,
1311            } => {
1312                // We split the BrTable into two instructions, `BrTable` and `BrTableCarry`.
1313                // The former applies where the target of the jump does not expect any value at
1314                // the top of the stack, whereas the latter one applies when a single value is
1315                // expected.
1316                //
1317                // Validation (see validate.rs case for BrTable) ensures that all targets of the
1318                // jump have the same label type, so we determine the label type
1319                // by only checking the label type of the default jump target.
1320                //
1321                // In the case of BrTableCarry we need to insert additional Copy instructions to
1322                // make sure that when execution resumes after the jump the value is available
1323                // in the correct register.
1324                let target_frame =
1325                    state.ctrls.get(*default).context("Could not get jump target frame.")?;
1326                if let BlockType::EmptyType = target_frame.label_type {
1327                    self.out.push(BrTable);
1328                    let _condition_source = self.push_consume()?;
1329                } else {
1330                    self.out.push(BrTableCarry);
1331                    let _condition_source = self.push_consume()?;
1332                    let _copy_source = self.push_consume()?;
1333                }
1334
1335                // the try_into is not needed because MAX_SWITCH_SIZE is small enough
1336                // but it does not hurt.
1337                let labels_len: u16 = labels.len().try_into()?;
1338                self.out.push_u16(labels_len);
1339                self.push_br_table_jump(*default)?;
1340                // The label types are the same for the default as well all the other
1341                // labels.
1342                for label_idx in labels {
1343                    self.push_br_table_jump(*label_idx)?;
1344                }
1345                // Everything after BrTable, until the end of the block is unreachable.
1346                self.providers_stack.truncate(state.opds.stack.len())?;
1347            }
1348            OpCode::Return => {
1349                // The interpreter will know that return means terminate execution of the
1350                // function and from the result type of the function it will be
1351                // clear whether anything needs to be returned.
1352                if self.return_type.is_some() {
1353                    let top = self.providers_stack.consume()?;
1354                    if top != RETURN_VALUE_LOCATION {
1355                        self.out.push(InternalOpcode::Copy);
1356                        self.push_loc(top);
1357                        self.push_loc(RETURN_VALUE_LOCATION);
1358                    }
1359                }
1360                self.out.push(Return);
1361                // Everything after Return, until the end of the block is unreachable.
1362                self.providers_stack.truncate(state.opds.stack.len())?;
1363            }
1364            &OpCode::Call(idx) => {
1365                self.out.push(Call);
1366                self.out.push_u32(idx);
1367                let f = ctx.get_func(idx)?;
1368                // The interpreter knows the number of arguments already. No need to record.
1369                // Note that arguments are from **last** to first.
1370                for _ in &f.parameters {
1371                    self.push_consume()?;
1372                }
1373                // Return value, if it exists. The interpreter knows the return type
1374                // already.
1375                if f.result.is_some() {
1376                    // To clarify any confusion, the return value will be available, by convention,
1377                    // in the RETURN_VALUE_LOCATION register of the callee. We
1378                    // then need to copy that into the appropriate location in
1379                    // the caller's register.
1380                    self.push_provide();
1381                }
1382            }
1383            OpCode::TickEnergy(cost) => {
1384                self.out.push(TickEnergy);
1385                self.out.push_u32(*cost);
1386            }
1387            &OpCode::CallIndirect(idx) => {
1388                self.out.push(CallIndirect);
1389                self.out.push_u32(idx);
1390                self.push_consume()?;
1391                let f = ctx.get_type(idx)?;
1392                // The interpreter knows the number of arguments already. No need to record.
1393                // Note that arguments are from **last** to first.
1394                for _ in &f.parameters {
1395                    let provider = self.providers_stack.consume()?;
1396                    self.push_loc(provider);
1397                }
1398                // The interpreter knows the return type already.
1399                if f.result.is_some() {
1400                    self.push_provide();
1401                }
1402            }
1403            OpCode::Nop => {
1404                // do nothing, we don't need an opcode for that since we don't
1405                // care about alignment.
1406            }
1407            OpCode::Unreachable => {
1408                self.out.push(Unreachable);
1409                // Everything after Unreachable, until the end of the block is unreachable.
1410                self.providers_stack.truncate(state.opds.stack.len())?;
1411            }
1412            OpCode::Drop => {
1413                self.providers_stack.consume()?;
1414            }
1415            OpCode::Select => {
1416                self.push_ternary(Select)?;
1417            }
1418            OpCode::LocalGet(idx) => {
1419                // the as i32 is safe because idx < NUM_ALLOWED_LOCALS <= 2^15
1420                self.providers_stack.provide_existing(Provider::Local((*idx) as i32));
1421                // No instruction
1422            }
1423            OpCode::LocalSet(idx) | OpCode::LocalTee(idx) => {
1424                // If the last instruction produced something just remove the indirection and
1425                // write directly to the local
1426                //
1427                // If the value of this particular local is somewhere on the providers_stack we
1428                // need to preserve that value.
1429                // - make a new dynamic value beyond all possible values. (the "preserve" space)
1430                // - copy from local to that value
1431                let mut reserve = None;
1432                // In principle this loop here is "non-linear" behaviour
1433                // since we need to iterate the entire length of the stack for each instruction.
1434                // The worst case of this is LocalTee since that keeps the stack the same
1435                // height. But note that stack height is limited by
1436                // `MAX_ALLOWED_STACK_HEIGHT`, so this is not really quadratic
1437                // behaviour, it is still linear in the number of instructions
1438                // except the constant is rather large.
1439                //
1440                // There is a benchmark with the module validation-time-preserve.wat that
1441                // measure validation of a module that is more than 1MB with a stack height
1442                // 1000. with mostly LocalTee instructions. It takes in the range of 13ms to
1443                // validate and compile. Which is well within acceptable range. So for the
1444                // time being we do not add extra complexity to make the behaviour here more
1445                // efficient. But that is an optimization that can be done without making
1446                // any breaking changes.
1447                for provide_slot in self.providers_stack.stack.iter_mut() {
1448                    if let Provider::Local(l) = *provide_slot {
1449                        if Ok(*idx) == u32::try_from(l) {
1450                            let reserve = match reserve {
1451                                Some(r) => r,
1452                                None => {
1453                                    let result = Provider::Dynamic(
1454                                        self.providers_stack.dynamic_locations.get(),
1455                                    );
1456                                    reserve = Some(result);
1457                                    result
1458                                }
1459                            };
1460                            // When an operation actually reads this value it will read it from the
1461                            // reserve slot.
1462                            *provide_slot = reserve;
1463                        }
1464                    }
1465                }
1466                let idx = (*idx).try_into()?; // This should never overflow since we have a very low bound on locals. But we
1467                                              // are just playing it safe.
1468                if let Some(reserve) = reserve {
1469                    self.out.push(Copy);
1470                    self.out.push_i32(idx); // from
1471                    self.push_loc(reserve); // to
1472                }
1473                if matches!(opcode, OpCode::LocalSet(..)) {
1474                    match last_provide {
1475                        // if we had to copy to a reserve location then it is not
1476                        // possible to short circuit the instruction.
1477                        // We need to insert an additional copy instruction.
1478                        Some(back_loc) if reserve.is_none() => {
1479                            // instead of inserting LocalSet, just tell the previous
1480                            // instruction to copy the value directly into the local.
1481                            self.out.back_patch(back_loc, idx as u32)?;
1482
1483                            // And clear the provider from the stack
1484                            self.providers_stack.consume()?;
1485                        }
1486                        _ => {
1487                            self.out.push(Copy);
1488                            let _operand = self.push_consume()?; // value first.
1489                            self.out.push_i32(idx); //target second
1490                        }
1491                    }
1492                } else {
1493                    match last_provide {
1494                        // if we had to copy to a reserve location then it is not
1495                        // possible to short circuit the instruction since there is an extra Copy
1496                        // instruction. We need to insert an additional copy
1497                        // instruction.
1498                        Some(back_loc) if reserve.is_none() => {
1499                            // instead of inserting LocalSet, just tell the previous
1500                            // instruction to copy the value directly into the local.
1501                            self.out.back_patch(back_loc, idx as u32)?;
1502
1503                            // And clear the provider from the stack
1504                            self.providers_stack.consume()?;
1505                            self.providers_stack.provide_existing(Provider::Local(idx));
1506                        }
1507                        _ => {
1508                            // And clear the provider from the stack
1509                            self.out.push(Copy);
1510                            let _source = self.push_consume()?;
1511                            self.out.push_i32(idx); //target second
1512                            self.providers_stack.provide_existing(Provider::Local(idx));
1513                        }
1514                    }
1515                }
1516            }
1517            OpCode::GlobalGet(idx) => {
1518                // In principle globals could also be "providers" or locations to write.
1519                // We could have them in front of constants, in the negative space.
1520                // This is a bit complex for the same reason that SetLocal is complex.
1521                // We need to sometimes insert Copy instructions to preserve values that
1522                // are on the operand stack. We have prototyped this as well but it did
1523                // not lead to performance improvements on examples, so that case is not handled
1524                // here.
1525                self.out.push(GlobalGet);
1526                // the as u16 is safe because idx < MAX_NUM_GLOBALS <= 2^16
1527                self.out.push_u16(*idx as u16);
1528                self.push_provide();
1529            }
1530            OpCode::GlobalSet(idx) => {
1531                self.out.push(GlobalSet);
1532                // the as u16 is safe because idx < MAX_NUM_GLOBALS <= 2^16
1533                self.out.push_u16(*idx as u16);
1534                self.push_consume()?;
1535            }
1536            OpCode::I32Load(memarg) => {
1537                self.push_mem_load(I32Load, memarg)?;
1538            }
1539            OpCode::I64Load(memarg) => {
1540                self.push_mem_load(I64Load, memarg)?;
1541            }
1542            OpCode::I32Load8S(memarg) => {
1543                self.push_mem_load(I32Load8S, memarg)?;
1544            }
1545            OpCode::I32Load8U(memarg) => {
1546                self.push_mem_load(I32Load8U, memarg)?;
1547            }
1548            OpCode::I32Load16S(memarg) => {
1549                self.push_mem_load(I32Load16S, memarg)?;
1550            }
1551            OpCode::I32Load16U(memarg) => {
1552                self.push_mem_load(I32Load16U, memarg)?;
1553            }
1554            OpCode::I64Load8S(memarg) => {
1555                self.push_mem_load(I64Load8S, memarg)?;
1556            }
1557            OpCode::I64Load8U(memarg) => {
1558                self.push_mem_load(I64Load8U, memarg)?;
1559            }
1560            OpCode::I64Load16S(memarg) => {
1561                self.push_mem_load(I64Load16S, memarg)?;
1562            }
1563            OpCode::I64Load16U(memarg) => {
1564                self.push_mem_load(I64Load16U, memarg)?;
1565            }
1566            OpCode::I64Load32S(memarg) => {
1567                self.push_mem_load(I64Load32S, memarg)?;
1568            }
1569            OpCode::I64Load32U(memarg) => {
1570                self.push_mem_load(I64Load32U, memarg)?;
1571            }
1572            OpCode::I32Store(memarg) => {
1573                self.push_mem_store(I32Store, memarg)?;
1574            }
1575            OpCode::I64Store(memarg) => {
1576                self.push_mem_store(I64Store, memarg)?;
1577            }
1578            OpCode::I32Store8(memarg) => {
1579                self.push_mem_store(I32Store8, memarg)?;
1580            }
1581            OpCode::I32Store16(memarg) => {
1582                self.push_mem_store(I32Store16, memarg)?;
1583            }
1584            OpCode::I64Store8(memarg) => {
1585                self.push_mem_store(I64Store8, memarg)?;
1586            }
1587            OpCode::I64Store16(memarg) => {
1588                self.push_mem_store(I64Store16, memarg)?;
1589            }
1590            OpCode::I64Store32(memarg) => {
1591                self.push_mem_store(I64Store32, memarg)?;
1592            }
1593            OpCode::MemorySize => {
1594                self.out.push(MemorySize);
1595                self.push_provide();
1596            }
1597            OpCode::MemoryGrow => self.push_unary(MemoryGrow)?,
1598            &OpCode::I32Const(c) => {
1599                self.providers_stack.push_constant(c as i64)?;
1600            }
1601            &OpCode::I64Const(c) => {
1602                self.providers_stack.push_constant(c)?;
1603            }
1604            OpCode::I32Eqz => {
1605                self.push_unary(I32Eqz)?;
1606            }
1607            OpCode::I32Eq => {
1608                self.push_binary(I32Eq)?;
1609            }
1610            OpCode::I32Ne => {
1611                self.push_binary(I32Ne)?;
1612            }
1613            OpCode::I32LtS => {
1614                self.push_binary(I32LtS)?;
1615            }
1616            OpCode::I32LtU => {
1617                self.push_binary(I32LtU)?;
1618            }
1619            OpCode::I32GtS => {
1620                self.push_binary(I32GtS)?;
1621            }
1622            OpCode::I32GtU => {
1623                self.push_binary(I32GtU)?;
1624            }
1625            OpCode::I32LeS => {
1626                self.push_binary(I32LeS)?;
1627            }
1628            OpCode::I32LeU => {
1629                self.push_binary(I32LeU)?;
1630            }
1631            OpCode::I32GeS => {
1632                self.push_binary(I32GeS)?;
1633            }
1634            OpCode::I32GeU => {
1635                self.push_binary(I32GeU)?;
1636            }
1637            OpCode::I64Eqz => {
1638                self.push_unary(I64Eqz)?;
1639            }
1640            OpCode::I64Eq => {
1641                self.push_binary(I64Eq)?;
1642            }
1643            OpCode::I64Ne => {
1644                self.push_binary(I64Ne)?;
1645            }
1646            OpCode::I64LtS => {
1647                self.push_binary(I64LtS)?;
1648            }
1649            OpCode::I64LtU => {
1650                self.push_binary(I64LtU)?;
1651            }
1652            OpCode::I64GtS => {
1653                self.push_binary(I64GtS)?;
1654            }
1655            OpCode::I64GtU => {
1656                self.push_binary(I64GtU)?;
1657            }
1658            OpCode::I64LeS => {
1659                self.push_binary(I64LeS)?;
1660            }
1661            OpCode::I64LeU => {
1662                self.push_binary(I64LeU)?;
1663            }
1664            OpCode::I64GeS => {
1665                self.push_binary(I64GeS)?;
1666            }
1667            OpCode::I64GeU => {
1668                self.push_binary(I64GeU)?;
1669            }
1670            OpCode::I32Clz => {
1671                self.push_unary(I32Clz)?;
1672            }
1673            OpCode::I32Ctz => {
1674                self.push_unary(I32Ctz)?;
1675            }
1676            OpCode::I32Popcnt => {
1677                self.push_unary(I32Popcnt)?;
1678            }
1679            OpCode::I32Add => {
1680                self.push_binary(I32Add)?;
1681            }
1682            OpCode::I32Sub => {
1683                self.push_binary(I32Sub)?;
1684            }
1685            OpCode::I32Mul => {
1686                self.push_binary(I32Mul)?;
1687            }
1688            OpCode::I32DivS => {
1689                self.push_binary(I32DivS)?;
1690            }
1691            OpCode::I32DivU => {
1692                self.push_binary(I32DivU)?;
1693            }
1694            OpCode::I32RemS => {
1695                self.push_binary(I32RemS)?;
1696            }
1697            OpCode::I32RemU => {
1698                self.push_binary(I32RemU)?;
1699            }
1700            OpCode::I32And => {
1701                self.push_binary(I32And)?;
1702            }
1703            OpCode::I32Or => {
1704                self.push_binary(I32Or)?;
1705            }
1706            OpCode::I32Xor => {
1707                self.push_binary(I32Xor)?;
1708            }
1709            OpCode::I32Shl => {
1710                self.push_binary(I32Shl)?;
1711            }
1712            OpCode::I32ShrS => {
1713                self.push_binary(I32ShrS)?;
1714            }
1715            OpCode::I32ShrU => {
1716                self.push_binary(I32ShrU)?;
1717            }
1718            OpCode::I32Rotl => {
1719                self.push_binary(I32Rotl)?;
1720            }
1721            OpCode::I32Rotr => {
1722                self.push_binary(I32Rotr)?;
1723            }
1724            OpCode::I64Clz => {
1725                self.push_unary(I64Clz)?;
1726            }
1727            OpCode::I64Ctz => {
1728                self.push_unary(I64Ctz)?;
1729            }
1730            OpCode::I64Popcnt => {
1731                self.push_unary(I64Popcnt)?;
1732            }
1733            OpCode::I64Add => {
1734                self.push_binary(I64Add)?;
1735            }
1736            OpCode::I64Sub => {
1737                self.push_binary(I64Sub)?;
1738            }
1739            OpCode::I64Mul => {
1740                self.push_binary(I64Mul)?;
1741            }
1742            OpCode::I64DivS => {
1743                self.push_binary(I64DivS)?;
1744            }
1745            OpCode::I64DivU => {
1746                self.push_binary(I64DivU)?;
1747            }
1748            OpCode::I64RemS => {
1749                self.push_binary(I64RemS)?;
1750            }
1751            OpCode::I64RemU => {
1752                self.push_binary(I64RemU)?;
1753            }
1754            OpCode::I64And => {
1755                self.push_binary(I64And)?;
1756            }
1757            OpCode::I64Or => {
1758                self.push_binary(I64Or)?;
1759            }
1760            OpCode::I64Xor => {
1761                self.push_binary(I64Xor)?;
1762            }
1763            OpCode::I64Shl => {
1764                self.push_binary(I64Shl)?;
1765            }
1766            OpCode::I64ShrS => {
1767                self.push_binary(I64ShrS)?;
1768            }
1769            OpCode::I64ShrU => {
1770                self.push_binary(I64ShrU)?;
1771            }
1772            OpCode::I64Rotl => {
1773                self.push_binary(I64Rotl)?;
1774            }
1775            OpCode::I64Rotr => {
1776                self.push_binary(I64Rotr)?;
1777            }
1778            OpCode::I32WrapI64 => {
1779                self.push_unary(I32WrapI64)?;
1780            }
1781            OpCode::I64ExtendI32S => {
1782                self.push_unary(I64ExtendI32S)?;
1783            }
1784            OpCode::I64ExtendI32U => {
1785                self.push_unary(I64ExtendI32U)?;
1786            }
1787            OpCode::I32Extend8S => {
1788                self.push_unary(I32Extend8S)?;
1789            }
1790            OpCode::I32Extend16S => {
1791                self.push_unary(I32Extend16S)?;
1792            }
1793            OpCode::I64Extend8S => {
1794                self.push_unary(I64Extend8S)?;
1795            }
1796            OpCode::I64Extend16S => {
1797                self.push_unary(I64Extend16S)?;
1798            }
1799            OpCode::I64Extend32S => {
1800                self.push_unary(I64Extend32S)?;
1801            }
1802        }
1803        // This opcode handler maintains the invariant that the providers stack is the
1804        // same size as the operand stack.
1805        assert_eq!(self.providers_stack.stack.len(), state.opds.stack.len(), "{opcode:?}");
1806        Ok(())
1807    }
1808
1809    fn finish(self, _state: &ValidationState) -> CompileResult<Self::Outcome> {
1810        ensure!(self.backpatch.stack.is_empty(), "There are still jumps to backpatch.");
1811        let mut constants = vec![0; self.providers_stack.constants.len()];
1812        for (value, place) in self.providers_stack.constants {
1813            *constants
1814                .get_mut(usize::try_from(-(place + 1))?)
1815                .context("Invariant violation. All locations are meant to be consecutive.")? =
1816                value;
1817        }
1818        Ok((self.out, self.providers_stack.dynamic_locations.next_location, constants))
1819    }
1820}
1821
1822struct ModuleContext<'a> {
1823    module: &'a Module,
1824    locals: &'a [LocalsRange],
1825    code:   &'a Code,
1826}
1827
1828impl<'a> HasValidationContext for ModuleContext<'a> {
1829    fn get_local(&self, idx: u32) -> CompileResult<ValueType> {
1830        let res = self.locals.binary_search_by(|locals| {
1831            if locals.end <= idx {
1832                std::cmp::Ordering::Less
1833            } else if idx < locals.start {
1834                std::cmp::Ordering::Greater
1835            } else {
1836                std::cmp::Ordering::Equal
1837            }
1838        });
1839        match res {
1840            Ok(idx) => Ok(self.locals[idx].ty),
1841            Err(_) => bail!("Local index out of range."),
1842        }
1843    }
1844
1845    fn get_global(&self, idx: crate::types::GlobalIndex) -> CompileResult<(ValueType, bool)> {
1846        match self.module.global.globals.get(idx as usize) {
1847            Some(g) => Ok((ValueType::from(g), g.mutable)),
1848            None => bail!("Attempting to access non-existing global."),
1849        }
1850    }
1851
1852    fn memory_exists(&self) -> bool { self.module.memory.memory_type.is_some() }
1853
1854    fn table_exists(&self) -> bool { self.module.table.table_type.is_some() }
1855
1856    fn get_func(&self, idx: FuncIndex) -> CompileResult<&std::rc::Rc<FunctionType>> {
1857        if (idx as usize) < self.module.import.imports.len() {
1858            match self.module.import.imports[idx as usize].description {
1859                ImportDescription::Func {
1860                    type_idx,
1861                } => self
1862                    .module
1863                    .ty
1864                    .get(type_idx)
1865                    .ok_or_else(|| anyhow!("Attempting to get type that does not exist")),
1866            }
1867        } else {
1868            self.module
1869                .code
1870                .impls
1871                .get(idx as usize - self.module.import.imports.len())
1872                .map(|c| &c.ty)
1873                .ok_or_else(|| anyhow!("Attempting to get type of function that does not exist."))
1874        }
1875    }
1876
1877    fn get_type(&self, idx: TypeIndex) -> CompileResult<&std::rc::Rc<FunctionType>> {
1878        self.module
1879            .ty
1880            .types
1881            .get(idx as usize)
1882            .ok_or_else(|| anyhow!("Attempting to get non-existing type."))
1883    }
1884
1885    fn return_type(&self) -> BlockType { BlockType::from(self.code.ty.result) }
1886}
1887
1888/// Compile a module into an artifact, failing if there are problems.
1889/// Problems should not arise if the module is well-formed, and all the imports
1890/// are supported by the `I` type.
1891impl Module {
1892    pub fn compile<I: TryFromImport>(self) -> CompileResult<Artifact<I, CompiledFunction>> {
1893        let mut code_out = Vec::with_capacity(self.code.impls.len());
1894
1895        for code in self.code.impls.iter() {
1896            let mut ranges = Vec::with_capacity(code.ty.parameters.len() + code.locals.len());
1897            let mut locals = Vec::with_capacity(code.ty.parameters.len() + code.locals.len());
1898            let mut start = 0;
1899            for &param in code.ty.parameters.iter() {
1900                let end = start + 1;
1901                ranges.push(LocalsRange {
1902                    start,
1903                    end,
1904                    ty: param,
1905                });
1906                start = end;
1907            }
1908            for &local in code.locals.iter() {
1909                locals.push(ArtifactLocal::try_from(local)?);
1910                let end = start + local.multiplicity;
1911                ranges.push(LocalsRange {
1912                    start,
1913                    end,
1914                    ty: local.ty,
1915                });
1916                start = end;
1917            }
1918
1919            let context = ModuleContext {
1920                module: &self,
1921                locals: &ranges,
1922                code,
1923            };
1924            let (mut exec_code, num_registers, constants) = validate(
1925                &context,
1926                code.expr.instrs.iter().map(Result::Ok),
1927                BackPatch::new(start, code.ty.result),
1928            )?;
1929            // We add a return instruction at the end so we have an easier time in the
1930            // interpreter since there is no implicit return.
1931
1932            // No need to insert an additional Copy here. The `End` block will insert it if
1933            // needed.
1934            exec_code.push(InternalOpcode::Return);
1935
1936            let num_params: u32 = code.ty.parameters.len().try_into()?;
1937
1938            let result = CompiledFunction {
1939                type_idx: code.ty_idx,
1940                params: code.ty.parameters.clone(),
1941                num_locals: start - num_params,
1942                locals,
1943                return_type: BlockType::from(code.ty.result),
1944                num_registers: num_registers.try_into()?,
1945                constants,
1946                code: exec_code,
1947            };
1948            code_out.push(result)
1949        }
1950
1951        let ty = self.ty.types.into_iter().map(|x| (*x).clone()).collect::<Vec<FunctionType>>();
1952        let table = {
1953            if let Some(tt) = self.table.table_type {
1954                let mut functions = vec![None; tt.limits.min as usize];
1955                for init in self.element.elements.iter() {
1956                    // validation has already ensured that inits are within bounds.
1957                    for (place, value) in
1958                        functions[init.offset as usize..].iter_mut().zip(init.inits.iter())
1959                    {
1960                        *place = Some(*value)
1961                    }
1962                }
1963                InstantiatedTable {
1964                    functions,
1965                }
1966            } else {
1967                InstantiatedTable {
1968                    functions: Vec::new(),
1969                }
1970            }
1971        };
1972        let memory = {
1973            if let Some(mt) = self.memory.memory_type {
1974                Some(ArtifactMemory {
1975                    init_size: mt.limits.min,
1976                    max_size:  mt
1977                        .limits
1978                        .max
1979                        .map(|x| std::cmp::min(x, MAX_NUM_PAGES))
1980                        .unwrap_or(MAX_NUM_PAGES),
1981                    init:      self
1982                        .data
1983                        .sections
1984                        .into_iter()
1985                        .map(ArtifactData::from)
1986                        .collect::<Vec<_>>(),
1987                })
1988            } else {
1989                None
1990            }
1991        };
1992        let global = InstantiatedGlobals {
1993            inits: self.global.globals.iter().map(|x| x.init).collect::<Vec<_>>(),
1994        };
1995        let export = self
1996            .export
1997            .exports
1998            .into_iter()
1999            .filter_map(|export| {
2000                if let ExportDescription::Func {
2001                    index,
2002                } = export.description
2003                {
2004                    Some((export.name, index))
2005                } else {
2006                    None
2007                }
2008            })
2009            .collect::<BTreeMap<_, _>>();
2010        let imports = self
2011            .import
2012            .imports
2013            .into_iter()
2014            .map(|i| I::try_from_import(&ty, i))
2015            .collect::<CompileResult<_>>()?;
2016        Ok(Artifact {
2017            version: ArtifactVersion::V1,
2018            imports,
2019            ty,
2020            table,
2021            memory,
2022            global,
2023            export,
2024            code: code_out,
2025        })
2026    }
2027}