1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
//! AST definition of Wasm modules, as well as supporting datatypes.
//! Based on the [W3C Wasm specification](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-import)
//!
//! The main type is [Module](./struct.Module.html), which defines a Wasm
//! module, either validated or not validated. Some of the properties that are
//! stated as validity conditions in the Wasm specification are already
//! guaranteed automatically by the AST definition of the Module, and the
//! parsing functions.

use anyhow::bail;
use derive_more::{Display, From};
use std::{convert::TryFrom, rc::Rc};

#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Clone, Display)]
/// A webassembly Name. We choose to have it be an owned value rather than a
/// reference into the original module. Names are also used in the parsed AST,
/// and we don't want to retain references to the original bytes just because of
/// a few names.
#[display(fmt = "{}", name)]
pub struct Name {
    /// Names in Wasm are utf8 encoded.
    pub name: String,
}

impl AsRef<str> for Name {
    fn as_ref(&self) -> &str { &self.name }
}

impl<'a> From<&'a str> for Name {
    fn from(s: &'a str) -> Self {
        Self {
            name: s.to_string(),
        }
    }
}

impl std::borrow::Borrow<str> for Name {
    fn borrow(&self) -> &str { &self.name }
}

#[derive(Debug)]
/// A single import description.
pub enum ImportDescription {
    /// Import a function with the given type. The other import types, Table,
    /// Memory, Global, are not supported by Concordium.
    Func {
        type_idx: TypeIndex,
    },
}

#[derive(Debug)]
/// Import of an item from another module.
pub struct Import {
    /// The name of the module the item is imported from.
    pub mod_name:    Name,
    /// The name of the item that is to be imported.
    pub item_name:   Name,
    /// And the description of the item.
    pub description: ImportDescription,
}

impl Import {
    /// Return whether the import is a function.
    pub fn is_func(&self) -> bool {
        match self.description {
            ImportDescription::Func {
                ..
            } => true,
        }
    }
}

#[derive(Debug, Default)]
/// The import section. This lists imports in the module.
///
/// The Default instance for this type produces an empty section.
pub struct ImportSection {
    pub imports: Vec<Import>,
}

#[derive(Debug, Default)]
/// The function section. This contains types of declared functions.
///
/// The Default instance for this type produces an empty function section.
pub struct FunctionSection {
    pub types: Vec<TypeIndex>,
}

impl FunctionSection {
    pub fn get(&self, idx: FuncIndex) -> Option<TypeIndex> { self.types.get(idx as usize).copied() }
}

#[derive(Debug, Default)]
/// The table section. The table is a list of functions and is used to support
/// indirect function calls.
///
/// The Default instance for this type produces an empty table section.
pub struct TableSection {
    /// We only support at most one section for now, as in Wasm MVP, hence an
    /// Option as opposed to the vector.
    ///
    /// If present, the table type limits ensure that min <= 2^16.
    pub table_type: Option<TableType>,
}

#[derive(Debug)]
/// An exported item description. Since it is inconsequential whether extra
/// definitions are exported we allow all of them to be flexible with respect to
/// the external tooling.
pub enum ExportDescription {
    /// An exported function with the given type.
    Func {
        index: FuncIndex,
    },
    /// An exported table. Since only table with index 0 is currently supported
    /// there is no explicit index.
    Table,
    /// Exported memory. Since only memory with index 0 is currently supported
    /// there is no explicit index.
    Memory,
    /// An exported global.
    Global {
        index: GlobalIndex,
    },
}

#[derive(Debug)]
/// An exported item.
pub struct Export {
    /// Name of the exported item.
    pub name:        Name,
    /// And its type.
    pub description: ExportDescription,
}

#[derive(Debug, Default)]
/// The export section, which lists exported functions, globals, etc.
///
/// The Default instance of this type returns an empty section.
pub struct ExportSection {
    pub exports: Vec<Export>,
}

#[derive(Debug, Default)]
/// We do not support start sections, so this type has exactly one value, the
/// empty start section, which is of course also the value returned by the
/// Default instance.
pub struct StartSection {}

#[derive(Debug)]
/// An element description, describing how to initialize the table.
/// The table index 0 is implicit, so we don't record it in the struct.
pub struct Element {
    /// The offset to start the initialization.
    pub offset: i32,
    /// Functions to define in the table, starting at the offset.
    pub inits:  Vec<FuncIndex>,
}

#[derive(Debug, Default)]
/// The element section, which is used to initialize the function table.
///
/// The Default instance of this produces an empty Element section.
pub struct ElementSection {
    pub elements: Vec<Element>,
}

#[derive(Debug, Copy, Clone)]
/// The initial global value with its type.
/// Because we do not allow imported globals, the initialization expression
/// must consist of a single constant value of the right type, which we
/// short-circuit into the single constant.
pub enum GlobalInit {
    I32(i32),
    I64(i64),
}

impl From<GlobalInit> for i64 {
    fn from(g: GlobalInit) -> Self {
        match g {
            GlobalInit::I64(x) => x,
            GlobalInit::I32(x) => i64::from(x),
        }
    }
}

impl From<&Global> for ValueType {
    fn from(g: &Global) -> Self {
        match g.init {
            GlobalInit::I32(_) => ValueType::I32,
            GlobalInit::I64(_) => ValueType::I64,
        }
    }
}

impl GlobalInit {
    ///  Type of this global
    pub fn ty(self) -> ValueType {
        match self {
            GlobalInit::I32(_) => ValueType::I32,
            GlobalInit::I64(_) => ValueType::I64,
        }
    }
}

#[derive(Debug)]
/// A single Global declaration, with initial value.
pub struct Global {
    /// The type of the value with the initial value.
    pub init:    GlobalInit,
    pub mutable: bool,
}

#[derive(Debug, Default)]
/// The list of globals declared in the module.
///
/// The Default instance of this type returns an empty section.
pub struct GlobalSection {
    pub globals: Vec<Global>,
}

impl GlobalSection {
    pub fn get(&self, idx: GlobalIndex) -> Option<&Global> { self.globals.get(idx as usize) }
}

#[derive(Debug, Clone, Copy)]
/// A local variable declaration in a function.
pub struct Local {
    /// The number of variables of this type.
    pub multiplicity: u32,
    /// The type of the local.
    pub ty:           ValueType,
}

#[derive(Debug)]
/// The body of a function.
pub struct Code {
    /// Type of the function, this is added here to avoid more error cases.
    /// in processing (e.g., after validation we know that the number of code
    /// and function sections match).
    pub ty:         Rc<FunctionType>,
    /// Type index carried over from the source. This should match the ty type
    /// above.
    pub ty_idx:     TypeIndex,
    /// The number of locals of a function. NB: This includes parameters and
    /// locals declared inside the function.
    pub num_locals: u32,
    /// Declaration of the locals. This does not include parameters.
    pub locals:     Vec<Local>,
    /// And a sequence of instructions.
    pub expr:       Expression,
}

#[derive(Debug, Default)]
/// The code section, which contains the code of functions declared in the
/// module.
///
/// The Default instance of this type returns an empty code section.
pub struct CodeSection {
    pub impls: Vec<Code>,
}

#[derive(Debug)]
/// The initialization of memory. The memory index is implicitly 0.
pub struct Data {
    /// Where to start initializing.
    pub offset: i32,
    /// The bytes to initialize with.
    pub init:   Vec<u8>,
}

#[derive(Debug, Default)]
/// The data section, this is used to initialize linear memory upon start of
/// execution.
///
/// The Default instance of this type returns an empty data section.
pub struct DataSection {
    pub sections: Vec<Data>,
}

#[derive(Debug, Default)]
/// The memory section, which contains declarations of linear memories. The Wasm
/// version we support only supports at most one memory section.
///
/// The Default instance for this type produces an empty memory section.
pub struct MemorySection {
    /// Since we only support the memory with index 0 we use an Option as
    /// opposed to a vector. In the version of Wasm we support
    pub memory_type: Option<MemoryType>,
}

#[derive(Debug)]
/// A processed custom section. By specification all custom sections have a
/// name, followed by uninterpreted bytes.
pub struct CustomSection<'a> {
    pub name:     Name,
    pub contents: &'a [u8],
}

#[derive(Debug, Default)]
/// The type section, which contains type definitions.
///
/// The default instance for type produces an empty type section.
pub struct TypeSection {
    /// A list of types. We use an Rc here so that we can avoid cloning the
    /// FunctionType, which could be used to use-up resources when we
    /// add this type to each of the code sections.
    pub types: Vec<Rc<FunctionType>>,
}

impl TypeSection {
    pub fn get(&self, idx: TypeIndex) -> Option<&Rc<FunctionType>> { self.types.get(idx as usize) }
}

#[derive(Debug)]
/// A parsed Wasm module. This no longer has custom sections since they are not
/// needed for further processing.
pub struct Module {
    pub ty:      TypeSection,
    pub import:  ImportSection,
    pub func:    FunctionSection,
    pub table:   TableSection,
    pub memory:  MemorySection,
    pub global:  GlobalSection,
    pub export:  ExportSection,
    pub start:   StartSection,
    pub element: ElementSection,
    pub code:    CodeSection,
    pub data:    DataSection,
}

pub type StackSize = u64;
/// A number of operands on the stack.
pub type StackHeight = u64;

/// Indices
pub type TypeIndex = u32;
pub type FuncIndex = u32;
pub type TableIndex = u32;
pub type MemIndex = u32;
pub type GlobalIndex = u32;
pub type LocalIndex = u32;
pub type LabelIndex = u32;

/// Supported Wasm value types (i.e., no floats). We use a very low-level
/// encoding which we make use of to remove some needless allocations. In
/// particular the tags must be as specified by the Wasm specification and must
/// match the binary serialization.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[repr(u8)]
pub enum ValueType {
    I32 = 0x7F,
    I64 = 0x7E,
}

/// Try to decode a value type from a single byte, the bytes being as specified
/// by the Wasm specification.
impl TryFrom<u8> for ValueType {
    type Error = anyhow::Error;

    fn try_from(value: u8) -> Result<Self, Self::Error> {
        match value {
            0x7F => Ok(ValueType::I32),
            0x7E => Ok(ValueType::I64),
            _ => bail!("Unknown value type byte {:#04x}", value),
        }
    }
}

/// Try to decode a value type from a single byte, the bytes being as specified
/// by the Wasm specification.
impl From<ValueType> for u8 {
    fn from(from: ValueType) -> Self {
        match from {
            ValueType::I32 => 0x7F,
            ValueType::I64 => 0x7E,
        }
    }
}

#[derive(Debug, Clone, Copy, Eq, PartialEq)]
/// We only support the empty block type and a single value type. Type indices
/// are not supported in the MVP version of Wasm.
pub enum BlockType {
    EmptyType,
    ValueType(ValueType),
}

impl From<Option<ValueType>> for BlockType {
    fn from(opt: Option<ValueType>) -> Self {
        match opt {
            Some(x) => BlockType::ValueType(x),
            None => BlockType::EmptyType,
        }
    }
}

impl BlockType {
    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
    pub fn is_empty(self) -> bool {
        match self {
            BlockType::EmptyType => true,
            BlockType::ValueType(_) => false,
        }
    }
}

#[derive(Debug, Copy, Clone)]
pub struct TableType {
    pub limits: Limits,
}

#[derive(Debug, Clone, Copy)]
pub struct MemoryType {
    pub limits: Limits,
}

#[derive(Debug, Clone, Eq, PartialEq)]
/// The immediate memory argument. Since all operations are on memory index 0
/// the index is implicit.
pub struct MemArg {
    /// The offest into the linear memory.
    pub offset: u32,
    /// Alignment. This is ignored by the Wasm semantics, but may be used as a
    /// hint. We will simply ignore it.
    pub align:  u32,
}

#[derive(Debug, Copy, Clone)]
pub struct Limits {
    pub min: u32,
    pub max: Option<u32>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
/// A function type with at most one return value. The MVP version of Wasm does
/// not support multiple return values, and thus we don't either.
pub struct FunctionType {
    pub parameters: Vec<ValueType>,
    pub result:     Option<ValueType>,
}

impl FunctionType {
    /// A function type with no arguments and no results.
    pub fn empty() -> Self {
        Self {
            parameters: Vec::new(),
            result:     None,
        }
    }
}

/// A sequence of instructions.
pub type InstrSeq = Vec<OpCode>;

/// An expression is a sequence of instructions followed by the "end" delimiter,
/// which is also present in the binary format (see 5.4.6).
#[derive(Debug, Default, From)]
pub struct Expression {
    pub instrs: InstrSeq,
}

#[derive(Clone, Debug, Eq, PartialEq)]
/// See [Wasm spec](https://webassembly.github.io/spec/core/syntax/instructions.html) for details of the meaning of the instructions.
pub enum OpCode {
    // Control instructions
    End,
    Nop,
    Unreachable,
    Block(BlockType),
    Loop(BlockType),
    If {
        ty: BlockType,
    },
    Else,
    Br(LabelIndex),
    BrIf(LabelIndex),
    BrTable {
        labels:  Vec<LabelIndex>,
        default: LabelIndex,
    },
    Return,
    Call(FuncIndex),
    CallIndirect(TypeIndex),

    // Parametric instructions
    Drop,
    Select,

    // Variable instructions
    LocalGet(LocalIndex),
    LocalSet(LocalIndex),
    LocalTee(LocalIndex),
    GlobalGet(GlobalIndex),
    GlobalSet(GlobalIndex),

    // Memory instructions
    I32Load(MemArg),
    I64Load(MemArg),
    I32Load8S(MemArg),
    I32Load8U(MemArg),
    I32Load16S(MemArg),
    I32Load16U(MemArg),
    I64Load8S(MemArg),
    I64Load8U(MemArg),
    I64Load16S(MemArg),
    I64Load16U(MemArg),
    I64Load32S(MemArg),
    I64Load32U(MemArg),
    I32Store(MemArg),
    I64Store(MemArg),
    I32Store8(MemArg),
    I32Store16(MemArg),
    I64Store8(MemArg),
    I64Store16(MemArg),
    I64Store32(MemArg),
    MemorySize,
    MemoryGrow,

    // Numeric instructions
    I32Const(i32),
    I64Const(i64),

    I32Eqz,
    I32Eq,
    I32Ne,
    I32LtS,
    I32LtU,
    I32GtS,
    I32GtU,
    I32LeS,
    I32LeU,
    I32GeS,
    I32GeU,
    I64Eqz,
    I64Eq,
    I64Ne,
    I64LtS,
    I64LtU,
    I64GtS,
    I64GtU,
    I64LeS,
    I64LeU,
    I64GeS,
    I64GeU,

    I32Clz,
    I32Ctz,
    I32Popcnt,
    I32Add,
    I32Sub,
    I32Mul,
    I32DivS,
    I32DivU,
    I32RemS,
    I32RemU,
    I32And,
    I32Or,
    I32Xor,
    I32Shl,
    I32ShrS,
    I32ShrU,
    I32Rotl,
    I32Rotr,
    I64Clz,
    I64Ctz,
    I64Popcnt,
    I64Add,
    I64Sub,
    I64Mul,
    I64DivS,
    I64DivU,
    I64RemS,
    I64RemU,
    I64And,
    I64Or,
    I64Xor,
    I64Shl,
    I64ShrS,
    I64ShrU,
    I64Rotl,
    I64Rotr,

    I32WrapI64,
    I64ExtendI32S,
    I64ExtendI32U,
}