concordium_wasm/
types.rs

1//! AST definition of Wasm modules, as well as supporting datatypes.
2//! Based on the [W3C Wasm specification](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-import)
3//!
4//! The main type is [Module](./struct.Module.html), which defines a Wasm
5//! module, either validated or not validated. Some of the properties that are
6//! stated as validity conditions in the Wasm specification are already
7//! guaranteed automatically by the AST definition of the Module, and the
8//! parsing functions.
9
10use anyhow::bail;
11use derive_more::{Display, From};
12use std::{convert::TryFrom, rc::Rc};
13
14#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Clone, Display)]
15/// A webassembly Name. We choose to have it be an owned value rather than a
16/// reference into the original module. Names are also used in the parsed AST,
17/// and we don't want to retain references to the original bytes just because of
18/// a few names.
19#[display(fmt = "{}", name)]
20pub struct Name {
21    /// Names in Wasm are utf8 encoded.
22    pub name: String,
23}
24
25impl AsRef<str> for Name {
26    fn as_ref(&self) -> &str { &self.name }
27}
28
29impl<'a> From<&'a str> for Name {
30    fn from(s: &'a str) -> Self {
31        Self {
32            name: s.to_string(),
33        }
34    }
35}
36
37impl std::borrow::Borrow<str> for Name {
38    fn borrow(&self) -> &str { &self.name }
39}
40
41#[derive(Debug)]
42/// A single import description.
43pub enum ImportDescription {
44    /// Import a function with the given type. The other import types, Table,
45    /// Memory, Global, are not supported by Concordium.
46    Func {
47        type_idx: TypeIndex,
48    },
49}
50
51#[derive(Debug)]
52/// Import of an item from another module.
53pub struct Import {
54    /// The name of the module the item is imported from.
55    pub mod_name:    Name,
56    /// The name of the item that is to be imported.
57    pub item_name:   Name,
58    /// And the description of the item.
59    pub description: ImportDescription,
60}
61
62impl Import {
63    /// Return whether the import is a function.
64    pub fn is_func(&self) -> bool {
65        match self.description {
66            ImportDescription::Func {
67                ..
68            } => true,
69        }
70    }
71}
72
73#[derive(Debug, Default)]
74/// The import section. This lists imports in the module.
75///
76/// The Default instance for this type produces an empty section.
77pub struct ImportSection {
78    pub imports: Vec<Import>,
79}
80
81#[derive(Debug, Default)]
82/// The function section. This contains types of declared functions.
83///
84/// The Default instance for this type produces an empty function section.
85pub struct FunctionSection {
86    pub types: Vec<TypeIndex>,
87}
88
89impl FunctionSection {
90    pub fn get(&self, idx: FuncIndex) -> Option<TypeIndex> { self.types.get(idx as usize).copied() }
91}
92
93#[derive(Debug, Default)]
94/// The table section. The table is a list of functions and is used to support
95/// indirect function calls.
96///
97/// The Default instance for this type produces an empty table section.
98pub struct TableSection {
99    /// We only support at most one section for now, as in Wasm MVP, hence an
100    /// Option as opposed to the vector.
101    ///
102    /// If present, the table type limits ensure that min <= 2^16.
103    pub table_type: Option<TableType>,
104}
105
106#[derive(Debug)]
107/// An exported item description. Since it is inconsequential whether extra
108/// definitions are exported we allow all of them to be flexible with respect to
109/// the external tooling.
110pub enum ExportDescription {
111    /// An exported function with the given type.
112    Func {
113        index: FuncIndex,
114    },
115    /// An exported table. Since only table with index 0 is currently supported
116    /// there is no explicit index.
117    Table,
118    /// Exported memory. Since only memory with index 0 is currently supported
119    /// there is no explicit index.
120    Memory,
121    /// An exported global.
122    Global {
123        index: GlobalIndex,
124    },
125}
126
127#[derive(Debug)]
128/// An exported item.
129pub struct Export {
130    /// Name of the exported item.
131    pub name:        Name,
132    /// And its type.
133    pub description: ExportDescription,
134}
135
136#[derive(Debug, Default)]
137/// The export section, which lists exported functions, globals, etc.
138///
139/// The Default instance of this type returns an empty section.
140pub struct ExportSection {
141    pub exports: Vec<Export>,
142}
143
144#[derive(Debug, Default)]
145/// We do not support start sections, so this type has exactly one value, the
146/// empty start section, which is of course also the value returned by the
147/// Default instance.
148pub struct StartSection {}
149
150#[derive(Debug)]
151/// An element description, describing how to initialize the table.
152/// The table index 0 is implicit, so we don't record it in the struct.
153pub struct Element {
154    /// The offset to start the initialization.
155    pub offset: i32,
156    /// Functions to define in the table, starting at the offset.
157    pub inits:  Vec<FuncIndex>,
158}
159
160#[derive(Debug, Default)]
161/// The element section, which is used to initialize the function table.
162///
163/// The Default instance of this produces an empty Element section.
164pub struct ElementSection {
165    pub elements: Vec<Element>,
166}
167
168#[derive(Debug, Copy, Clone)]
169/// The initial global value with its type.
170/// Because we do not allow imported globals, the initialization expression
171/// must consist of a single constant value of the right type, which we
172/// short-circuit into the single constant.
173pub enum GlobalInit {
174    I32(i32),
175    I64(i64),
176}
177
178impl From<GlobalInit> for i64 {
179    fn from(g: GlobalInit) -> Self {
180        match g {
181            GlobalInit::I64(x) => x,
182            GlobalInit::I32(x) => i64::from(x),
183        }
184    }
185}
186
187impl From<&Global> for ValueType {
188    fn from(g: &Global) -> Self {
189        match g.init {
190            GlobalInit::I32(_) => ValueType::I32,
191            GlobalInit::I64(_) => ValueType::I64,
192        }
193    }
194}
195
196impl GlobalInit {
197    ///  Type of this global
198    pub fn ty(self) -> ValueType {
199        match self {
200            GlobalInit::I32(_) => ValueType::I32,
201            GlobalInit::I64(_) => ValueType::I64,
202        }
203    }
204}
205
206#[derive(Debug)]
207/// A single Global declaration, with initial value.
208pub struct Global {
209    /// The type of the value with the initial value.
210    pub init:    GlobalInit,
211    pub mutable: bool,
212}
213
214#[derive(Debug, Default)]
215/// The list of globals declared in the module.
216///
217/// The Default instance of this type returns an empty section.
218pub struct GlobalSection {
219    pub globals: Vec<Global>,
220}
221
222impl GlobalSection {
223    pub fn get(&self, idx: GlobalIndex) -> Option<&Global> { self.globals.get(idx as usize) }
224}
225
226#[derive(Debug, Clone, Copy)]
227/// A local variable declaration in a function.
228pub struct Local {
229    /// The number of variables of this type.
230    pub multiplicity: u32,
231    /// The type of the local.
232    pub ty:           ValueType,
233}
234
235#[derive(Debug)]
236/// The body of a function.
237pub struct Code {
238    /// Type of the function, this is added here to avoid more error cases.
239    /// in processing (e.g., after validation we know that the number of code
240    /// and function sections match).
241    pub ty:         Rc<FunctionType>,
242    /// Type index carried over from the source. This should match the ty type
243    /// above.
244    pub ty_idx:     TypeIndex,
245    /// The number of locals of a function. NB: This includes parameters and
246    /// locals declared inside the function.
247    pub num_locals: u32,
248    /// Declaration of the locals. This does not include parameters.
249    pub locals:     Vec<Local>,
250    /// And a sequence of instructions.
251    pub expr:       Expression,
252}
253
254#[derive(Debug, Default)]
255/// The code section, which contains the code of functions declared in the
256/// module.
257///
258/// The Default instance of this type returns an empty code section.
259pub struct CodeSection {
260    pub impls: Vec<Code>,
261}
262
263#[derive(Debug)]
264/// The initialization of memory. The memory index is implicitly 0.
265pub struct Data {
266    /// Where to start initializing.
267    pub offset: i32,
268    /// The bytes to initialize with.
269    pub init:   Vec<u8>,
270}
271
272#[derive(Debug, Default)]
273/// The data section, this is used to initialize linear memory upon start of
274/// execution.
275///
276/// The Default instance of this type returns an empty data section.
277pub struct DataSection {
278    pub sections: Vec<Data>,
279}
280
281#[derive(Debug, Default)]
282/// The memory section, which contains declarations of linear memories. The Wasm
283/// version we support only supports at most one memory section.
284///
285/// The Default instance for this type produces an empty memory section.
286pub struct MemorySection {
287    /// Since we only support the memory with index 0 we use an Option as
288    /// opposed to a vector. In the version of Wasm we support
289    pub memory_type: Option<MemoryType>,
290}
291
292#[derive(Debug)]
293/// A processed custom section. By specification all custom sections have a
294/// name, followed by uninterpreted bytes.
295pub struct CustomSection<'a> {
296    pub name:     Name,
297    pub contents: &'a [u8],
298}
299
300#[derive(Debug, Default)]
301/// The type section, which contains type definitions.
302///
303/// The default instance for type produces an empty type section.
304pub struct TypeSection {
305    /// A list of types. We use an Rc here so that we can avoid cloning the
306    /// FunctionType, which could be used to use-up resources when we
307    /// add this type to each of the code sections.
308    pub types: Vec<Rc<FunctionType>>,
309}
310
311impl TypeSection {
312    pub fn get(&self, idx: TypeIndex) -> Option<&Rc<FunctionType>> { self.types.get(idx as usize) }
313}
314
315#[derive(Debug)]
316/// A parsed Wasm module. This no longer has custom sections since they are not
317/// needed for further processing.
318pub struct Module {
319    pub ty:      TypeSection,
320    pub import:  ImportSection,
321    pub func:    FunctionSection,
322    pub table:   TableSection,
323    pub memory:  MemorySection,
324    pub global:  GlobalSection,
325    pub export:  ExportSection,
326    pub start:   StartSection,
327    pub element: ElementSection,
328    pub code:    CodeSection,
329    pub data:    DataSection,
330}
331
332pub type StackSize = u64;
333/// A number of operands on the stack.
334pub type StackHeight = u64;
335
336/// Indices
337pub type TypeIndex = u32;
338pub type FuncIndex = u32;
339pub type TableIndex = u32;
340pub type MemIndex = u32;
341pub type GlobalIndex = u32;
342pub type LocalIndex = u32;
343pub type LabelIndex = u32;
344
345/// Supported Wasm value types (i.e., no floats). We use a very low-level
346/// encoding which we make use of to remove some needless allocations. In
347/// particular the tags must be as specified by the Wasm specification and must
348/// match the binary serialization.
349#[derive(Clone, Copy, Debug, Eq, PartialEq)]
350#[repr(u8)]
351pub enum ValueType {
352    I32 = 0x7F,
353    I64 = 0x7E,
354}
355
356/// Try to decode a value type from a single byte, the bytes being as specified
357/// by the Wasm specification.
358impl TryFrom<u8> for ValueType {
359    type Error = anyhow::Error;
360
361    fn try_from(value: u8) -> Result<Self, Self::Error> {
362        match value {
363            0x7F => Ok(ValueType::I32),
364            0x7E => Ok(ValueType::I64),
365            _ => bail!("Unknown value type byte {:#04x}", value),
366        }
367    }
368}
369
370/// Try to decode a value type from a single byte, the bytes being as specified
371/// by the Wasm specification.
372impl From<ValueType> for u8 {
373    fn from(from: ValueType) -> Self {
374        match from {
375            ValueType::I32 => 0x7F,
376            ValueType::I64 => 0x7E,
377        }
378    }
379}
380
381#[derive(Debug, Clone, Copy, Eq, PartialEq)]
382/// We only support the empty block type and a single value type. Type indices
383/// are not supported in the MVP version of Wasm.
384pub enum BlockType {
385    EmptyType,
386    ValueType(ValueType),
387}
388
389impl From<Option<ValueType>> for BlockType {
390    fn from(opt: Option<ValueType>) -> Self {
391        match opt {
392            Some(x) => BlockType::ValueType(x),
393            None => BlockType::EmptyType,
394        }
395    }
396}
397
398impl BlockType {
399    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
400    pub fn is_empty(self) -> bool {
401        match self {
402            BlockType::EmptyType => true,
403            BlockType::ValueType(_) => false,
404        }
405    }
406}
407
408#[derive(Debug, Copy, Clone)]
409pub struct TableType {
410    pub limits: Limits,
411}
412
413#[derive(Debug, Clone, Copy)]
414pub struct MemoryType {
415    pub limits: Limits,
416}
417
418#[derive(Debug, Clone, Eq, PartialEq)]
419/// The immediate memory argument. Since all operations are on memory index 0
420/// the index is implicit.
421pub struct MemArg {
422    /// The offest into the linear memory.
423    pub offset: u32,
424    /// Alignment. This is ignored by the Wasm semantics, but may be used as a
425    /// hint. We will simply ignore it.
426    pub align:  u32,
427}
428
429#[derive(Debug, Copy, Clone)]
430pub struct Limits {
431    pub min: u32,
432    pub max: Option<u32>,
433}
434
435#[derive(Debug, Clone, PartialEq, Eq)]
436/// A function type with at most one return value. The MVP version of Wasm does
437/// not support multiple return values, and thus we don't either.
438pub struct FunctionType {
439    pub parameters: Vec<ValueType>,
440    pub result:     Option<ValueType>,
441}
442
443impl FunctionType {
444    /// A function type with no arguments and no results.
445    pub fn empty() -> Self {
446        Self {
447            parameters: Vec::new(),
448            result:     None,
449        }
450    }
451}
452
453/// A sequence of instructions.
454pub type InstrSeq = Vec<OpCode>;
455
456/// An expression is a sequence of instructions followed by the "end" delimiter,
457/// which is also present in the binary format (see 5.4.6).
458#[derive(Debug, Default, From)]
459pub struct Expression {
460    pub instrs: InstrSeq,
461}
462
463#[derive(Clone, Debug, Eq, PartialEq)]
464/// See [Wasm spec](https://webassembly.github.io/spec/core/syntax/instructions.html) for details of the meaning of the instructions.
465pub enum OpCode {
466    // Control instructions
467    End,
468    Nop,
469    Unreachable,
470    Block(BlockType),
471    Loop(BlockType),
472    If {
473        ty: BlockType,
474    },
475    Else,
476    Br(LabelIndex),
477    BrIf(LabelIndex),
478    BrTable {
479        labels:  Vec<LabelIndex>,
480        default: LabelIndex,
481    },
482    Return,
483    Call(FuncIndex),
484    CallIndirect(TypeIndex),
485
486    // Parametric instructions
487    Drop,
488    Select,
489
490    // Variable instructions
491    LocalGet(LocalIndex),
492    LocalSet(LocalIndex),
493    LocalTee(LocalIndex),
494    GlobalGet(GlobalIndex),
495    GlobalSet(GlobalIndex),
496
497    // Memory instructions
498    I32Load(MemArg),
499    I64Load(MemArg),
500    I32Load8S(MemArg),
501    I32Load8U(MemArg),
502    I32Load16S(MemArg),
503    I32Load16U(MemArg),
504    I64Load8S(MemArg),
505    I64Load8U(MemArg),
506    I64Load16S(MemArg),
507    I64Load16U(MemArg),
508    I64Load32S(MemArg),
509    I64Load32U(MemArg),
510    I32Store(MemArg),
511    I64Store(MemArg),
512    I32Store8(MemArg),
513    I32Store16(MemArg),
514    I64Store8(MemArg),
515    I64Store16(MemArg),
516    I64Store32(MemArg),
517    MemorySize,
518    MemoryGrow,
519
520    // Numeric instructions
521    I32Const(i32),
522    I64Const(i64),
523
524    I32Eqz,
525    I32Eq,
526    I32Ne,
527    I32LtS,
528    I32LtU,
529    I32GtS,
530    I32GtU,
531    I32LeS,
532    I32LeU,
533    I32GeS,
534    I32GeU,
535    I64Eqz,
536    I64Eq,
537    I64Ne,
538    I64LtS,
539    I64LtU,
540    I64GtS,
541    I64GtU,
542    I64LeS,
543    I64LeU,
544    I64GeS,
545    I64GeU,
546
547    I32Clz,
548    I32Ctz,
549    I32Popcnt,
550    I32Add,
551    I32Sub,
552    I32Mul,
553    I32DivS,
554    I32DivU,
555    I32RemS,
556    I32RemU,
557    I32And,
558    I32Or,
559    I32Xor,
560    I32Shl,
561    I32ShrS,
562    I32ShrU,
563    I32Rotl,
564    I32Rotr,
565    I64Clz,
566    I64Ctz,
567    I64Popcnt,
568    I64Add,
569    I64Sub,
570    I64Mul,
571    I64DivS,
572    I64DivU,
573    I64RemS,
574    I64RemU,
575    I64And,
576    I64Or,
577    I64Xor,
578    I64Shl,
579    I64ShrS,
580    I64ShrU,
581    I64Rotl,
582    I64Rotr,
583
584    I32WrapI64,
585    I64ExtendI32S,
586    I64ExtendI32U,
587
588    // Sign extension instructions. Optionally supported
589    // depending on the protocol version.
590    I32Extend8S,
591    I32Extend16S,
592    I64Extend8S,
593    I64Extend16S,
594    I64Extend32S,
595
596    // Extra instructions that are not part of the Wasm spec.
597    // They are part of the OpCode data type regardless to simplify
598    // program transformations, chiefly the metering transformation.
599    // Ideally we'd have a type parameter to OpCode, or a different data type,
600    // but that creates more overhead than what is worth with the existing codebase.
601    // So we add it here to the type, and make sure to not produce it when parsing
602    // and disallow it during initial module validation.
603    TickEnergy(u32),
604}