concordium_wasm/types.rs
1//! AST definition of Wasm modules, as well as supporting datatypes.
2//! Based on the [W3C Wasm specification](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-import)
3//!
4//! The main type is [Module](./struct.Module.html), which defines a Wasm
5//! module, either validated or not validated. Some of the properties that are
6//! stated as validity conditions in the Wasm specification are already
7//! guaranteed automatically by the AST definition of the Module, and the
8//! parsing functions.
9
10use anyhow::bail;
11use derive_more::{Display, From};
12use std::{convert::TryFrom, rc::Rc};
13
14#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Clone, Display)]
15/// A webassembly Name. We choose to have it be an owned value rather than a
16/// reference into the original module. Names are also used in the parsed AST,
17/// and we don't want to retain references to the original bytes just because of
18/// a few names.
19#[display(fmt = "{}", name)]
20pub struct Name {
21 /// Names in Wasm are utf8 encoded.
22 pub name: String,
23}
24
25impl AsRef<str> for Name {
26 fn as_ref(&self) -> &str { &self.name }
27}
28
29impl<'a> From<&'a str> for Name {
30 fn from(s: &'a str) -> Self {
31 Self {
32 name: s.to_string(),
33 }
34 }
35}
36
37impl std::borrow::Borrow<str> for Name {
38 fn borrow(&self) -> &str { &self.name }
39}
40
41#[derive(Debug)]
42/// A single import description.
43pub enum ImportDescription {
44 /// Import a function with the given type. The other import types, Table,
45 /// Memory, Global, are not supported by Concordium.
46 Func {
47 type_idx: TypeIndex,
48 },
49}
50
51#[derive(Debug)]
52/// Import of an item from another module.
53pub struct Import {
54 /// The name of the module the item is imported from.
55 pub mod_name: Name,
56 /// The name of the item that is to be imported.
57 pub item_name: Name,
58 /// And the description of the item.
59 pub description: ImportDescription,
60}
61
62impl Import {
63 /// Return whether the import is a function.
64 pub fn is_func(&self) -> bool {
65 match self.description {
66 ImportDescription::Func {
67 ..
68 } => true,
69 }
70 }
71}
72
73#[derive(Debug, Default)]
74/// The import section. This lists imports in the module.
75///
76/// The Default instance for this type produces an empty section.
77pub struct ImportSection {
78 pub imports: Vec<Import>,
79}
80
81#[derive(Debug, Default)]
82/// The function section. This contains types of declared functions.
83///
84/// The Default instance for this type produces an empty function section.
85pub struct FunctionSection {
86 pub types: Vec<TypeIndex>,
87}
88
89impl FunctionSection {
90 pub fn get(&self, idx: FuncIndex) -> Option<TypeIndex> { self.types.get(idx as usize).copied() }
91}
92
93#[derive(Debug, Default)]
94/// The table section. The table is a list of functions and is used to support
95/// indirect function calls.
96///
97/// The Default instance for this type produces an empty table section.
98pub struct TableSection {
99 /// We only support at most one section for now, as in Wasm MVP, hence an
100 /// Option as opposed to the vector.
101 ///
102 /// If present, the table type limits ensure that min <= 2^16.
103 pub table_type: Option<TableType>,
104}
105
106#[derive(Debug)]
107/// An exported item description. Since it is inconsequential whether extra
108/// definitions are exported we allow all of them to be flexible with respect to
109/// the external tooling.
110pub enum ExportDescription {
111 /// An exported function with the given type.
112 Func {
113 index: FuncIndex,
114 },
115 /// An exported table. Since only table with index 0 is currently supported
116 /// there is no explicit index.
117 Table,
118 /// Exported memory. Since only memory with index 0 is currently supported
119 /// there is no explicit index.
120 Memory,
121 /// An exported global.
122 Global {
123 index: GlobalIndex,
124 },
125}
126
127#[derive(Debug)]
128/// An exported item.
129pub struct Export {
130 /// Name of the exported item.
131 pub name: Name,
132 /// And its type.
133 pub description: ExportDescription,
134}
135
136#[derive(Debug, Default)]
137/// The export section, which lists exported functions, globals, etc.
138///
139/// The Default instance of this type returns an empty section.
140pub struct ExportSection {
141 pub exports: Vec<Export>,
142}
143
144#[derive(Debug, Default)]
145/// We do not support start sections, so this type has exactly one value, the
146/// empty start section, which is of course also the value returned by the
147/// Default instance.
148pub struct StartSection {}
149
150#[derive(Debug)]
151/// An element description, describing how to initialize the table.
152/// The table index 0 is implicit, so we don't record it in the struct.
153pub struct Element {
154 /// The offset to start the initialization.
155 pub offset: i32,
156 /// Functions to define in the table, starting at the offset.
157 pub inits: Vec<FuncIndex>,
158}
159
160#[derive(Debug, Default)]
161/// The element section, which is used to initialize the function table.
162///
163/// The Default instance of this produces an empty Element section.
164pub struct ElementSection {
165 pub elements: Vec<Element>,
166}
167
168#[derive(Debug, Copy, Clone)]
169/// The initial global value with its type.
170/// Because we do not allow imported globals, the initialization expression
171/// must consist of a single constant value of the right type, which we
172/// short-circuit into the single constant.
173pub enum GlobalInit {
174 I32(i32),
175 I64(i64),
176}
177
178impl From<GlobalInit> for i64 {
179 fn from(g: GlobalInit) -> Self {
180 match g {
181 GlobalInit::I64(x) => x,
182 GlobalInit::I32(x) => i64::from(x),
183 }
184 }
185}
186
187impl From<&Global> for ValueType {
188 fn from(g: &Global) -> Self {
189 match g.init {
190 GlobalInit::I32(_) => ValueType::I32,
191 GlobalInit::I64(_) => ValueType::I64,
192 }
193 }
194}
195
196impl GlobalInit {
197 /// Type of this global
198 pub fn ty(self) -> ValueType {
199 match self {
200 GlobalInit::I32(_) => ValueType::I32,
201 GlobalInit::I64(_) => ValueType::I64,
202 }
203 }
204}
205
206#[derive(Debug)]
207/// A single Global declaration, with initial value.
208pub struct Global {
209 /// The type of the value with the initial value.
210 pub init: GlobalInit,
211 pub mutable: bool,
212}
213
214#[derive(Debug, Default)]
215/// The list of globals declared in the module.
216///
217/// The Default instance of this type returns an empty section.
218pub struct GlobalSection {
219 pub globals: Vec<Global>,
220}
221
222impl GlobalSection {
223 pub fn get(&self, idx: GlobalIndex) -> Option<&Global> { self.globals.get(idx as usize) }
224}
225
226#[derive(Debug, Clone, Copy)]
227/// A local variable declaration in a function.
228pub struct Local {
229 /// The number of variables of this type.
230 pub multiplicity: u32,
231 /// The type of the local.
232 pub ty: ValueType,
233}
234
235#[derive(Debug)]
236/// The body of a function.
237pub struct Code {
238 /// Type of the function, this is added here to avoid more error cases.
239 /// in processing (e.g., after validation we know that the number of code
240 /// and function sections match).
241 pub ty: Rc<FunctionType>,
242 /// Type index carried over from the source. This should match the ty type
243 /// above.
244 pub ty_idx: TypeIndex,
245 /// The number of locals of a function. NB: This includes parameters and
246 /// locals declared inside the function.
247 pub num_locals: u32,
248 /// Declaration of the locals. This does not include parameters.
249 pub locals: Vec<Local>,
250 /// And a sequence of instructions.
251 pub expr: Expression,
252}
253
254#[derive(Debug, Default)]
255/// The code section, which contains the code of functions declared in the
256/// module.
257///
258/// The Default instance of this type returns an empty code section.
259pub struct CodeSection {
260 pub impls: Vec<Code>,
261}
262
263#[derive(Debug)]
264/// The initialization of memory. The memory index is implicitly 0.
265pub struct Data {
266 /// Where to start initializing.
267 pub offset: i32,
268 /// The bytes to initialize with.
269 pub init: Vec<u8>,
270}
271
272#[derive(Debug, Default)]
273/// The data section, this is used to initialize linear memory upon start of
274/// execution.
275///
276/// The Default instance of this type returns an empty data section.
277pub struct DataSection {
278 pub sections: Vec<Data>,
279}
280
281#[derive(Debug, Default)]
282/// The memory section, which contains declarations of linear memories. The Wasm
283/// version we support only supports at most one memory section.
284///
285/// The Default instance for this type produces an empty memory section.
286pub struct MemorySection {
287 /// Since we only support the memory with index 0 we use an Option as
288 /// opposed to a vector. In the version of Wasm we support
289 pub memory_type: Option<MemoryType>,
290}
291
292#[derive(Debug)]
293/// A processed custom section. By specification all custom sections have a
294/// name, followed by uninterpreted bytes.
295pub struct CustomSection<'a> {
296 pub name: Name,
297 pub contents: &'a [u8],
298}
299
300#[derive(Debug, Default)]
301/// The type section, which contains type definitions.
302///
303/// The default instance for type produces an empty type section.
304pub struct TypeSection {
305 /// A list of types. We use an Rc here so that we can avoid cloning the
306 /// FunctionType, which could be used to use-up resources when we
307 /// add this type to each of the code sections.
308 pub types: Vec<Rc<FunctionType>>,
309}
310
311impl TypeSection {
312 pub fn get(&self, idx: TypeIndex) -> Option<&Rc<FunctionType>> { self.types.get(idx as usize) }
313}
314
315#[derive(Debug)]
316/// A parsed Wasm module. This no longer has custom sections since they are not
317/// needed for further processing.
318pub struct Module {
319 pub ty: TypeSection,
320 pub import: ImportSection,
321 pub func: FunctionSection,
322 pub table: TableSection,
323 pub memory: MemorySection,
324 pub global: GlobalSection,
325 pub export: ExportSection,
326 pub start: StartSection,
327 pub element: ElementSection,
328 pub code: CodeSection,
329 pub data: DataSection,
330}
331
332pub type StackSize = u64;
333/// A number of operands on the stack.
334pub type StackHeight = u64;
335
336/// Indices
337pub type TypeIndex = u32;
338pub type FuncIndex = u32;
339pub type TableIndex = u32;
340pub type MemIndex = u32;
341pub type GlobalIndex = u32;
342pub type LocalIndex = u32;
343pub type LabelIndex = u32;
344
345/// Supported Wasm value types (i.e., no floats). We use a very low-level
346/// encoding which we make use of to remove some needless allocations. In
347/// particular the tags must be as specified by the Wasm specification and must
348/// match the binary serialization.
349#[derive(Clone, Copy, Debug, Eq, PartialEq)]
350#[repr(u8)]
351pub enum ValueType {
352 I32 = 0x7F,
353 I64 = 0x7E,
354}
355
356/// Try to decode a value type from a single byte, the bytes being as specified
357/// by the Wasm specification.
358impl TryFrom<u8> for ValueType {
359 type Error = anyhow::Error;
360
361 fn try_from(value: u8) -> Result<Self, Self::Error> {
362 match value {
363 0x7F => Ok(ValueType::I32),
364 0x7E => Ok(ValueType::I64),
365 _ => bail!("Unknown value type byte {:#04x}", value),
366 }
367 }
368}
369
370/// Try to decode a value type from a single byte, the bytes being as specified
371/// by the Wasm specification.
372impl From<ValueType> for u8 {
373 fn from(from: ValueType) -> Self {
374 match from {
375 ValueType::I32 => 0x7F,
376 ValueType::I64 => 0x7E,
377 }
378 }
379}
380
381#[derive(Debug, Clone, Copy, Eq, PartialEq)]
382/// We only support the empty block type and a single value type. Type indices
383/// are not supported in the MVP version of Wasm.
384pub enum BlockType {
385 EmptyType,
386 ValueType(ValueType),
387}
388
389impl From<Option<ValueType>> for BlockType {
390 fn from(opt: Option<ValueType>) -> Self {
391 match opt {
392 Some(x) => BlockType::ValueType(x),
393 None => BlockType::EmptyType,
394 }
395 }
396}
397
398impl BlockType {
399 #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
400 pub fn is_empty(self) -> bool {
401 match self {
402 BlockType::EmptyType => true,
403 BlockType::ValueType(_) => false,
404 }
405 }
406}
407
408#[derive(Debug, Copy, Clone)]
409pub struct TableType {
410 pub limits: Limits,
411}
412
413#[derive(Debug, Clone, Copy)]
414pub struct MemoryType {
415 pub limits: Limits,
416}
417
418#[derive(Debug, Clone, Eq, PartialEq)]
419/// The immediate memory argument. Since all operations are on memory index 0
420/// the index is implicit.
421pub struct MemArg {
422 /// The offest into the linear memory.
423 pub offset: u32,
424 /// Alignment. This is ignored by the Wasm semantics, but may be used as a
425 /// hint. We will simply ignore it.
426 pub align: u32,
427}
428
429#[derive(Debug, Copy, Clone)]
430pub struct Limits {
431 pub min: u32,
432 pub max: Option<u32>,
433}
434
435#[derive(Debug, Clone, PartialEq, Eq)]
436/// A function type with at most one return value. The MVP version of Wasm does
437/// not support multiple return values, and thus we don't either.
438pub struct FunctionType {
439 pub parameters: Vec<ValueType>,
440 pub result: Option<ValueType>,
441}
442
443impl FunctionType {
444 /// A function type with no arguments and no results.
445 pub fn empty() -> Self {
446 Self {
447 parameters: Vec::new(),
448 result: None,
449 }
450 }
451}
452
453/// A sequence of instructions.
454pub type InstrSeq = Vec<OpCode>;
455
456/// An expression is a sequence of instructions followed by the "end" delimiter,
457/// which is also present in the binary format (see 5.4.6).
458#[derive(Debug, Default, From)]
459pub struct Expression {
460 pub instrs: InstrSeq,
461}
462
463#[derive(Clone, Debug, Eq, PartialEq)]
464/// See [Wasm spec](https://webassembly.github.io/spec/core/syntax/instructions.html) for details of the meaning of the instructions.
465pub enum OpCode {
466 // Control instructions
467 End,
468 Nop,
469 Unreachable,
470 Block(BlockType),
471 Loop(BlockType),
472 If {
473 ty: BlockType,
474 },
475 Else,
476 Br(LabelIndex),
477 BrIf(LabelIndex),
478 BrTable {
479 labels: Vec<LabelIndex>,
480 default: LabelIndex,
481 },
482 Return,
483 Call(FuncIndex),
484 CallIndirect(TypeIndex),
485
486 // Parametric instructions
487 Drop,
488 Select,
489
490 // Variable instructions
491 LocalGet(LocalIndex),
492 LocalSet(LocalIndex),
493 LocalTee(LocalIndex),
494 GlobalGet(GlobalIndex),
495 GlobalSet(GlobalIndex),
496
497 // Memory instructions
498 I32Load(MemArg),
499 I64Load(MemArg),
500 I32Load8S(MemArg),
501 I32Load8U(MemArg),
502 I32Load16S(MemArg),
503 I32Load16U(MemArg),
504 I64Load8S(MemArg),
505 I64Load8U(MemArg),
506 I64Load16S(MemArg),
507 I64Load16U(MemArg),
508 I64Load32S(MemArg),
509 I64Load32U(MemArg),
510 I32Store(MemArg),
511 I64Store(MemArg),
512 I32Store8(MemArg),
513 I32Store16(MemArg),
514 I64Store8(MemArg),
515 I64Store16(MemArg),
516 I64Store32(MemArg),
517 MemorySize,
518 MemoryGrow,
519
520 // Numeric instructions
521 I32Const(i32),
522 I64Const(i64),
523
524 I32Eqz,
525 I32Eq,
526 I32Ne,
527 I32LtS,
528 I32LtU,
529 I32GtS,
530 I32GtU,
531 I32LeS,
532 I32LeU,
533 I32GeS,
534 I32GeU,
535 I64Eqz,
536 I64Eq,
537 I64Ne,
538 I64LtS,
539 I64LtU,
540 I64GtS,
541 I64GtU,
542 I64LeS,
543 I64LeU,
544 I64GeS,
545 I64GeU,
546
547 I32Clz,
548 I32Ctz,
549 I32Popcnt,
550 I32Add,
551 I32Sub,
552 I32Mul,
553 I32DivS,
554 I32DivU,
555 I32RemS,
556 I32RemU,
557 I32And,
558 I32Or,
559 I32Xor,
560 I32Shl,
561 I32ShrS,
562 I32ShrU,
563 I32Rotl,
564 I32Rotr,
565 I64Clz,
566 I64Ctz,
567 I64Popcnt,
568 I64Add,
569 I64Sub,
570 I64Mul,
571 I64DivS,
572 I64DivU,
573 I64RemS,
574 I64RemU,
575 I64And,
576 I64Or,
577 I64Xor,
578 I64Shl,
579 I64ShrS,
580 I64ShrU,
581 I64Rotl,
582 I64Rotr,
583
584 I32WrapI64,
585 I64ExtendI32S,
586 I64ExtendI32U,
587
588 // Sign extension instructions. Optionally supported
589 // depending on the protocol version.
590 I32Extend8S,
591 I32Extend16S,
592 I64Extend8S,
593 I64Extend16S,
594 I64Extend32S,
595
596 // Extra instructions that are not part of the Wasm spec.
597 // They are part of the OpCode data type regardless to simplify
598 // program transformations, chiefly the metering transformation.
599 // Ideally we'd have a type parameter to OpCode, or a different data type,
600 // but that creates more overhead than what is worth with the existing codebase.
601 // So we add it here to the type, and make sure to not produce it when parsing
602 // and disallow it during initial module validation.
603 TickEnergy(u32),
604}