concordium_wasm/
validate.rs

1//! Utilities for Wasm module validation.
2//!
3//! The specification that is taken as the basis is [wasm-core-1-20191205](https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/),
4//! but we have further restrictions to accommodate our use on the chain.
5//! Some of these are already ensured by parsing, others are ensured by
6//! validation.
7//!
8//! The basic code validation algorithm used here is a straighforward transcript
9//! of the validation algorithm described in the appendix of the linked Wasm
10//! specification.
11
12use crate::{
13    constants::*,
14    parse::{
15        parse_custom, parse_sec_with_default, CodeSkeletonSection, InstructionValidationContext,
16        OpCodeIterator, ParseResult, Skeleton, EMPTY_CTX,
17    },
18    types::*,
19};
20use anyhow::{anyhow, bail, ensure};
21use std::{borrow::Borrow, collections::BTreeSet, convert::TryInto, rc::Rc};
22
23#[derive(Debug)]
24pub enum ValidationError {
25    TooManyLocals {
26        actual: u32,
27        max:    u32,
28    },
29}
30
31impl std::fmt::Display for ValidationError {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            ValidationError::TooManyLocals {
35                actual,
36                max,
37            } => write!(f, "The number of locals ({}) is more than allowed ({}).", actual, max),
38        }
39    }
40}
41
42/// Result type of validation.
43pub type ValidateResult<A> = anyhow::Result<A>;
44
45#[derive(Debug, Default)]
46/// The operand stack containing either known or unknown types.
47/// Unknown types appear on the stack by the use of parametric instructions
48/// after an unreachable section of the code.
49///
50/// The default instance produces an empty operator stack.
51pub(crate) struct OperandStack {
52    pub(crate) stack: Vec<MaybeKnown>,
53}
54
55#[derive(Debug, Default)]
56/// The stack of "control frames". A control frame is a block of code, e.g., a
57/// `block ... end` section.
58///
59/// The default instance produces an empty control stack.
60pub(crate) struct ControlStack {
61    pub(crate) stack: Vec<ControlFrame>,
62}
63
64impl ControlStack {
65    /// Get the n-th element of the stack, starting at 0.
66    pub fn get(&self, n: u32) -> Option<&ControlFrame> {
67        let n = n as usize;
68        if n >= self.stack.len() {
69            None
70        } else {
71            self.stack.get(self.stack.len() - n - 1)
72        }
73    }
74
75    /// Get the label type of the `n`-th label. This is the type
76    /// that is used when jumping to the label.
77    pub fn get_label(&self, n: u32) -> Option<BlockType> {
78        self.get(n).map(|frame| frame.label_type)
79    }
80
81    /// Get the outermost frame, target of the return jump.
82    pub fn outermost(&self) -> Option<&ControlFrame> { self.stack.first() }
83}
84
85#[derive(Debug)]
86/// A single control frame. This indicates what the types are for jumping to the
87/// label of this block, or normally exiting the block, as well as some metadata
88/// with reference to the ControlStack
89pub(crate) struct ControlFrame {
90    /// Whether the current control frame is started by an if.
91    pub(crate) is_if:       bool,
92    /// Label type of the block, this is the type that is used when
93    /// jumping to the label of the block.
94    pub(crate) label_type:  BlockType,
95    /// end type of the block, this is the type that is used when
96    /// ending the block in a normal way.
97    pub(crate) end_type:    BlockType,
98    /// height of the stack at the entry of this block.
99    pub(crate) height:      usize,
100    /// whether we are in the unreachable part of this block or not.
101    /// the unreachable part is any part after an unconditional jump or
102    /// a trap instruction.
103    pub(crate) unreachable: bool,
104}
105
106#[derive(Debug)]
107/// The validation state contains the control frames and a stack of operands.
108/// this is the same state as described by the validation algorithm of the wasm
109/// specification appendix.
110pub struct ValidationState {
111    pub(crate) opds:                 OperandStack,
112    pub(crate) ctrls:                ControlStack,
113    /// Maximum reachable stack height.
114    pub(crate) max_reachable_height: usize,
115    /// The smallest index of a control frame that has entered unreachable
116    /// section, if any.
117    pub(crate) unreachable_section:  Option<usize>,
118}
119
120impl ValidationState {
121    /// Return whether the frame we are in currently is completely unreachable,
122    /// the frame is reachable but the instruction inside it is not, or the
123    /// instruction is reachable.
124    pub fn reachability(&self) -> Reachability {
125        let Some(idx) = self.unreachable_section else {
126            return Reachability::Reachable
127        };
128        if idx + 1 < self.ctrls.stack.len() {
129            Reachability::UnreachableFrame
130        } else {
131            Reachability::UnreachableInstruction
132        }
133    }
134
135    /// Check whether we are done, meaning that the control stack is
136    /// exhausted.
137    pub fn done(&self) -> bool { self.ctrls.stack.is_empty() }
138}
139
140#[derive(Eq, PartialEq, Debug, Clone, Copy)]
141/// A possibly known type. Unknown types appear on the stack after
142/// we enter an unreachable part of the code. This part must still be
143/// type-checked, but the stack at that point is arbitrary.
144pub(crate) enum MaybeKnown {
145    Unknown,
146    Known(ValueType),
147}
148
149use MaybeKnown::*;
150
151impl MaybeKnown {
152    pub(crate) fn is_unknown(self) -> bool { self == MaybeKnown::Unknown }
153}
154
155impl ValidationState {
156    /// Push a new type to the stack.
157    fn push_opd(&mut self, m_type: MaybeKnown) {
158        self.opds.stack.push(m_type);
159        if matches!(
160            self.ctrls.stack.last(),
161            Some(ControlFrame {
162                unreachable: false,
163                ..
164            })
165        ) {
166            self.max_reachable_height =
167                std::cmp::max(self.max_reachable_height, self.opds.stack.len());
168        }
169    }
170
171    /// Pop a type from the stack and, if successful, return it.
172    fn pop_opd(&mut self) -> ValidateResult<MaybeKnown> {
173        match self.ctrls.stack.last() {
174            None => bail!("Control frame exhausted."),
175            Some(frame) => {
176                if self.opds.stack.len() == frame.height {
177                    if frame.unreachable {
178                        Ok(Unknown)
179                    } else {
180                        bail!("Operand stack exhausted for the current block.")
181                    }
182                } else {
183                    self.opds
184                        .stack
185                        .pop()
186                        .ok_or_else(|| anyhow!("Stack exhausted, should not happen."))
187                }
188            }
189        }
190    }
191
192    /// Pop an operand from the stack, checking that it is as expected.
193    ///
194    /// If successful, return the more precise type of the two, expected and
195    /// actual. The type in the stack can be marked as unknown if it is in
196    /// an unreachable part of the code.
197    fn pop_expect_opd(&mut self, expect: MaybeKnown) -> ValidateResult<MaybeKnown> {
198        let actual = self.pop_opd()?;
199        if actual.is_unknown() {
200            return Ok(expect);
201        }
202        if expect.is_unknown() {
203            return Ok(actual);
204        }
205        ensure!(
206            actual == expect,
207            "Actual type different from expected {:#?} /= {:#?}.",
208            actual,
209            expect
210        );
211        Ok(actual)
212    }
213
214    /// Push zero or one operands to the current stack.
215    fn push_opds(&mut self, tys: BlockType) {
216        if let BlockType::ValueType(ty) = tys {
217            self.push_opd(Known(ty))
218        }
219    }
220
221    /// Pop zero or one operands from the stack, and check that it
222    /// has expected type.
223    fn pop_opds(&mut self, expected: BlockType) -> ValidateResult<()> {
224        if let BlockType::ValueType(ty) = expected {
225            self.pop_expect_opd(Known(ty))?;
226        }
227        Ok(())
228    }
229
230    /// Push a new control frame with the given label and end types.
231    ///
232    /// The label type is the type that will be at the top of the stack
233    /// when a jump to this label is executed.
234    /// The end type is the type that is at the top of the stack when normal
235    /// execution of the block reaches its end.
236    ///
237    /// For blocks the label type and end type are the same, for loops the label
238    /// type is empty, and the end type is potentially not.
239    fn push_ctrl(&mut self, is_if: bool, label_type: BlockType, end_type: BlockType) {
240        let frame = ControlFrame {
241            is_if,
242            label_type,
243            end_type,
244            height: self.opds.stack.len(),
245            unreachable: false,
246        };
247        self.ctrls.stack.push(frame)
248    }
249
250    /// Pop the current control frame and return its result type, together
251    /// with a flag signalling whether the frame was started with an `if`.
252    fn pop_ctrl(&mut self) -> ValidateResult<(BlockType, bool)> {
253        // We first check for the last element, and use it without removing it.
254        // This is so that pop_expect_opd, which pops elements from the stack, can see
255        // whether we are in the unreachable state for the stack or not.
256        match self.ctrls.stack.last().map(|frame| (frame.end_type, frame.height, frame.is_if)) {
257            None => bail!("Control stack exhausted."),
258            Some((end_type, height, opcode)) => {
259                if let BlockType::ValueType(ty) = end_type {
260                    self.pop_expect_opd(Known(ty))?;
261                }
262                ensure!(self.opds.stack.len() == height, "Operand stack not exhausted.");
263                // Finally pop after we've made sure the stack is properly cleared.
264                self.ctrls.stack.pop();
265                // If the just-popped control frame was the lowest one that was unreachable
266                // then we have entered reachable code section again.
267                if let Some(idx) = self.unreachable_section {
268                    if idx == self.ctrls.stack.len() {
269                        self.unreachable_section = None;
270                    } // otherwise remain in unreachable code.
271                }
272                Ok((end_type, opcode))
273            }
274        }
275    }
276
277    fn mark_unreachable(&mut self) -> ValidateResult<()> {
278        match self.ctrls.stack.last_mut() {
279            None => bail!("Control stack exhausted."),
280            Some(frame) => {
281                self.opds.stack.truncate(frame.height);
282                frame.unreachable = true;
283                let last_idx = self.ctrls.stack.len() - 1;
284                if let Some(idx) = self.unreachable_section {
285                    self.unreachable_section = Some(std::cmp::min(idx, last_idx));
286                // -1 is safe since we know the stack is inhabited
287                } else {
288                    self.unreachable_section = Some(last_idx);
289                }
290                Ok(())
291            }
292        }
293    }
294}
295
296/// The local types, at indices start, start+1,..<end (not including end).
297pub(crate) struct LocalsRange {
298    pub(crate) start: LocalIndex,
299    pub(crate) end:   LocalIndex,
300    pub(crate) ty:    ValueType,
301}
302
303/// Context for the validation of a function.
304pub(crate) struct FunctionContext<'a> {
305    pub(crate) return_type: BlockType,
306    pub(crate) globals:     &'a [Global],
307    pub(crate) funcs:       &'a [TypeIndex],
308    pub(crate) types:       &'a [Rc<FunctionType>],
309    pub(crate) locals:      Vec<LocalsRange>,
310    // Whether memory exists or not.
311    pub(crate) memory:      bool,
312    // Whether the table exists or not.
313    pub(crate) table:       bool,
314}
315
316/// Make a locals structure used to validate a function body.
317/// This function additionally ensures that there are no more than
318/// ALLOWED_LOCALS local variables. Note that function parameters are included
319/// in locals.
320fn make_locals(ty: &FunctionType, locals: &[Local]) -> ValidateResult<(Vec<LocalsRange>, u32)> {
321    let mut out = Vec::with_capacity(ty.parameters.len() + locals.len());
322    let mut start = 0;
323    for &ty in ty.parameters.iter() {
324        let end = start + 1;
325        out.push(LocalsRange {
326            start,
327            end,
328            ty,
329        });
330        start = end;
331    }
332    for local in locals.iter() {
333        let end =
334            start.checked_add(local.multiplicity).ok_or_else(|| anyhow!("Too many locals"))?;
335        out.push(LocalsRange {
336            start,
337            end,
338            ty: local.ty,
339        });
340        start = end;
341    }
342    let num_locals = start;
343    ensure!(num_locals <= ALLOWED_LOCALS, ValidationError::TooManyLocals {
344        actual: num_locals,
345        max:    ALLOWED_LOCALS,
346    });
347    Ok((out, num_locals))
348}
349
350/// The trait used used to parametrize the validation algorithm so that it can
351/// be used for other applications than mere validation. In particular the
352/// validation algorithm maintains useful state during its run, e.g., current
353/// and maximum stack height, which is useful during compilation.
354pub trait HasValidationContext {
355    /// Get the local of a function at the given index.
356    /// Note that function parameters define implicit locals.
357    fn get_local(&self, idx: LocalIndex) -> ValidateResult<ValueType>;
358
359    /// Get a global together with its mutability. `true` for mutable, `false`
360    /// for constant.
361    fn get_global(&self, idx: GlobalIndex) -> ValidateResult<(ValueType, bool)>;
362
363    /// Return whether the module has memory.
364    fn memory_exists(&self) -> bool;
365
366    /// Return whether the module has the table.
367    fn table_exists(&self) -> bool;
368
369    /// Get the type of the function at the given index.
370    fn get_func(&self, idx: FuncIndex) -> ValidateResult<&Rc<FunctionType>>;
371
372    /// Get the type at the given index.
373    fn get_type(&self, idx: TypeIndex) -> ValidateResult<&Rc<FunctionType>>;
374
375    /// Return the return type of the function.
376    fn return_type(&self) -> BlockType;
377}
378
379impl<'a> HasValidationContext for FunctionContext<'a> {
380    fn get_local(&self, idx: LocalIndex) -> ValidateResult<ValueType> {
381        let res = self.locals.binary_search_by(|locals| {
382            if locals.end <= idx {
383                std::cmp::Ordering::Less
384            } else if idx < locals.start {
385                std::cmp::Ordering::Greater
386            } else {
387                std::cmp::Ordering::Equal
388            }
389        });
390        match res {
391            Ok(idx) => Ok(self.locals[idx].ty),
392            Err(_) => bail!("Local index out of range."),
393        }
394    }
395
396    /// Get a global together with its mutability.
397    fn get_global(&self, idx: GlobalIndex) -> ValidateResult<(ValueType, bool)> {
398        if let Some(global) = self.globals.get(idx as usize) {
399            Ok((global.init.ty(), global.mutable))
400        } else {
401            bail!("Global index out of range.")
402        }
403    }
404
405    fn memory_exists(&self) -> bool { self.memory }
406
407    fn table_exists(&self) -> bool { self.table }
408
409    fn get_func(&self, idx: FuncIndex) -> ValidateResult<&Rc<FunctionType>> {
410        if let Some(&type_idx) = self.funcs.get(idx as usize) {
411            self.get_type(type_idx)
412        } else {
413            bail!("Function index out of range.")
414        }
415    }
416
417    fn get_type(&self, idx: TypeIndex) -> ValidateResult<&Rc<FunctionType>> {
418        self.types.get(idx as usize).ok_or_else(|| anyhow!("Type index out of range."))
419    }
420
421    fn return_type(&self) -> BlockType { self.return_type }
422}
423
424/// A helper type used to ensure alignment.
425enum Type {
426    I8,
427    I16,
428    I32,
429    I64,
430}
431
432/// Ensure that the alignment is valid for the given type.
433fn ensure_alignment(num: u32, align: Type) -> ValidateResult<()> {
434    match align {
435        Type::I8 => {
436            ensure!(num == 0, "Type I8 alignment must be less than 0, but is {}.", num);
437        }
438        Type::I16 => {
439            ensure!(num <= 1, "Type I16 alignment must be less than 1, but is {}.", num);
440        }
441        Type::I32 => {
442            ensure!(num <= 2, "Type I32 alignment must be less than 2, but is {}", num);
443        }
444        Type::I64 => {
445            ensure!(num <= 3, "Type I64 alignment must be less than 3, but is {}.", num);
446        }
447    }
448    Ok(())
449}
450
451/// The state of validation with respect to reachability of the instructions
452/// that were processed.
453#[derive(Debug, Clone, Copy)]
454pub enum Reachability {
455    /// A state that is reachable.
456    Reachable,
457    /// An unreachable instruction inside a reachable frame.
458    UnreachableInstruction,
459    /// A frame that is not reachable.
460    UnreachableFrame,
461}
462
463/// Trait to handle the results of validation.
464/// The type parameter should be instantiated with an opcode. The reason it is a
465/// type parameter is to support both opcodes and references to opcodes as
466/// parameters. The latter is useful because opcodes are not copyable.
467pub trait Handler<Ctx: HasValidationContext, O> {
468    type Outcome: Sized;
469
470    /// Handle the opcode. This function is called __after__ the `validate`
471    /// function itself processes the opcode. Hence the validation state is
472    /// already updated.
473    fn handle_opcode(
474        &mut self,
475        ctx: &Ctx,
476        state: &ValidationState,
477        reachability: Reachability,
478        opcode: O,
479    ) -> anyhow::Result<()>;
480
481    /// Finish processing the code. This function is called after the code body
482    /// has been successfully validated.
483    fn finish(self, state: &ValidationState) -> anyhow::Result<Self::Outcome>;
484}
485
486/// A [`Handler`] that is used during validation of a pure Wasm module.
487/// The [`Default`] instance creates an empty handler.
488#[derive(Default)]
489pub struct PureWasmModuleHandler {
490    pub(crate) instr: Vec<OpCode>,
491}
492
493impl<Ctx: HasValidationContext> Handler<Ctx, OpCode> for PureWasmModuleHandler {
494    type Outcome = (Self, usize);
495
496    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
497    fn handle_opcode(
498        &mut self,
499        _ctx: &Ctx,
500        _state: &ValidationState,
501        _reachability: Reachability,
502        opcode: OpCode,
503    ) -> anyhow::Result<()> {
504        anyhow::ensure!(!matches!(opcode, OpCode::TickEnergy(_)));
505        self.instr.push(opcode);
506        Ok(())
507    }
508
509    #[cfg_attr(not(feature = "fuzz-coverage"), inline(always))]
510    fn finish(self, state: &ValidationState) -> anyhow::Result<Self::Outcome> {
511        Ok((self, state.max_reachable_height))
512    }
513}
514
515/// Validate a single function. In order that this function is as flexible as
516/// possible it takes as input just an iterator over opcodes. The function will
517/// terminate at the first opcode it fails to read. Validation will ensure that
518/// the iterator is fully consumed and properly terminated by an `End` opcode.
519/// The return value is the outcome determined by the handler, as well as
520/// the maximum reachable stack height in this function.
521pub fn validate<O: Borrow<OpCode>, Ctx: HasValidationContext, H: Handler<Ctx, O>>(
522    context: &Ctx,
523    opcodes: impl Iterator<Item = ParseResult<O>>,
524    mut handler: H,
525) -> ValidateResult<H::Outcome> {
526    let mut state = ValidationState {
527        opds:                 OperandStack::default(),
528        ctrls:                ControlStack::default(),
529        max_reachable_height: 0,
530        unreachable_section:  None,
531    };
532    state.push_ctrl(false, context.return_type(), context.return_type());
533    for opcode in opcodes {
534        let next_opcode = opcode?;
535        let unreachable_before = state.reachability();
536        match next_opcode.borrow() {
537            OpCode::TickEnergy(_) => {
538                // Do nothing, this does not affect the stack, it acts as a
539                // function of type () => ().
540            }
541            OpCode::End => {
542                let (res, is_if) = state.pop_ctrl()?;
543                if is_if {
544                    ensure!(
545                        res == BlockType::EmptyType,
546                        "If without an else must have empty return type"
547                    )
548                }
549                state.push_opds(res);
550            }
551            OpCode::Nop => {
552                // do nothing.
553            }
554            OpCode::Unreachable => {
555                state.mark_unreachable()?;
556            }
557            OpCode::Block(ty) => {
558                state.push_ctrl(false, *ty, *ty);
559            }
560            OpCode::Loop(ty) => {
561                state.push_ctrl(false, BlockType::EmptyType, *ty);
562            }
563            OpCode::If {
564                ty,
565            } => {
566                state.pop_expect_opd(Known(ValueType::I32))?;
567                state.push_ctrl(true, *ty, *ty);
568            }
569            OpCode::Else => {
570                let (res, is_if) = state.pop_ctrl()?;
571                ensure!(is_if, "Else can only come after an if");
572                state.push_ctrl(false, res, res);
573            }
574            OpCode::Br(label) => {
575                if let Some(label_type) = state.ctrls.get_label(*label) {
576                    state.pop_opds(label_type)?;
577                    state.mark_unreachable()?;
578                } else {
579                    bail!("Jump to a non-existent label.")
580                }
581            }
582            OpCode::BrIf(label) => {
583                if let Some(label_type) = state.ctrls.get_label(*label) {
584                    state.pop_expect_opd(Known(ValueType::I32))?;
585                    state.pop_opds(label_type)?;
586                    state.push_opds(label_type);
587                } else {
588                    bail!("Conditional jump to non-existent label.")
589                }
590            }
591            OpCode::BrTable {
592                labels,
593                default,
594            } => {
595                ensure!(
596                    labels.len() <= MAX_SWITCH_SIZE,
597                    "Size of switch statement exceeds maximum."
598                );
599                if let Some(default_label_type) = state.ctrls.get_label(*default) {
600                    for &label in labels.iter() {
601                        if let Some(target_frame) = state.ctrls.get(label) {
602                            ensure!(
603                                default_label_type == target_frame.label_type,
604                                "Different targets have different label types."
605                            );
606                        } else {
607                            bail!("Table jump to non-existent label.")
608                        }
609                    }
610                    state.pop_expect_opd(Known(ValueType::I32))?;
611                    state.pop_opds(default_label_type)?;
612                    state.mark_unreachable()?;
613                } else {
614                    bail!("Table jump to non-existent label.")
615                }
616            }
617            OpCode::Return => {
618                if let Some(label_type) = state.ctrls.outermost().map(|frame| frame.label_type) {
619                    state.pop_opds(label_type)?;
620                    state.mark_unreachable()?;
621                }
622            }
623            OpCode::Call(idx) => {
624                let func = context.get_func(*idx)?;
625                for &ty in func.parameters.iter().rev() {
626                    state.pop_expect_opd(Known(ty))?;
627                }
628                for &ty in func.result.iter() {
629                    state.push_opd(Known(ty))
630                }
631            }
632            OpCode::CallIndirect(idx) => {
633                ensure!(context.table_exists(), "Table with index 0 must exist.");
634                // the table type is valid by construction, there is only one.
635                let func = context.get_type(*idx)?;
636                state.pop_expect_opd(Known(ValueType::I32))?;
637                for &ty in func.parameters.iter().rev() {
638                    state.pop_expect_opd(Known(ty))?;
639                }
640                for &ty in func.result.iter() {
641                    state.push_opd(Known(ty))
642                }
643            }
644            OpCode::Drop => {
645                state.pop_opd()?;
646            }
647            OpCode::Select => {
648                state.pop_expect_opd(Known(ValueType::I32))?;
649                let t1 = state.pop_opd()?;
650                let t2 = state.pop_expect_opd(t1)?;
651                state.push_opd(t2);
652            }
653            OpCode::LocalGet(idx) => {
654                let ty = context.get_local(*idx)?;
655                state.push_opd(Known(ty));
656            }
657            OpCode::LocalSet(idx) => {
658                let ty = context.get_local(*idx)?;
659                state.pop_expect_opd(Known(ty))?;
660            }
661            OpCode::LocalTee(idx) => {
662                let ty = context.get_local(*idx)?;
663                let stack_ty = state.pop_expect_opd(Known(ty))?;
664                state.push_opd(stack_ty);
665            }
666            OpCode::GlobalGet(idx) => {
667                let ty = context.get_global(*idx)?.0;
668                state.push_opd(Known(ty));
669            }
670            OpCode::GlobalSet(idx) => {
671                let (ty, mutable) = context.get_global(*idx)?;
672                ensure!(mutable, "Trying to set a const global.");
673                state.pop_expect_opd(Known(ty))?;
674            }
675            OpCode::I32Load(memarg) => {
676                ensure!(context.memory_exists(), "Memory should exist.");
677                ensure_alignment(memarg.align, Type::I32)?;
678                state.pop_expect_opd(Known(ValueType::I32))?;
679                state.push_opd(Known(ValueType::I32));
680            }
681            OpCode::I64Load(memarg) => {
682                ensure!(context.memory_exists(), "Memory should exist.");
683                ensure_alignment(memarg.align, Type::I64)?;
684                state.pop_expect_opd(Known(ValueType::I32))?;
685                state.push_opd(Known(ValueType::I64));
686            }
687            OpCode::I32Load8S(memarg) => {
688                ensure!(context.memory_exists(), "Memory should exist.");
689                ensure_alignment(memarg.align, Type::I8)?;
690                state.pop_expect_opd(Known(ValueType::I32))?;
691                state.push_opd(Known(ValueType::I32));
692            }
693            OpCode::I32Load8U(memarg) => {
694                ensure!(context.memory_exists(), "Memory should exist.");
695                ensure_alignment(memarg.align, Type::I8)?;
696                state.pop_expect_opd(Known(ValueType::I32))?;
697                state.push_opd(Known(ValueType::I32));
698            }
699            OpCode::I32Load16S(memarg) => {
700                ensure!(context.memory_exists(), "Memory should exist.");
701                ensure_alignment(memarg.align, Type::I16)?;
702                state.pop_expect_opd(Known(ValueType::I32))?;
703                state.push_opd(Known(ValueType::I32));
704            }
705            OpCode::I32Load16U(memarg) => {
706                ensure!(context.memory_exists(), "Memory should exist.");
707                ensure_alignment(memarg.align, Type::I16)?;
708                state.pop_expect_opd(Known(ValueType::I32))?;
709                state.push_opd(Known(ValueType::I32));
710            }
711            OpCode::I64Load8S(memarg) => {
712                ensure!(context.memory_exists(), "Memory should exist.");
713                ensure_alignment(memarg.align, Type::I8)?;
714                state.pop_expect_opd(Known(ValueType::I32))?;
715                state.push_opd(Known(ValueType::I64));
716            }
717            OpCode::I64Load8U(memarg) => {
718                ensure!(context.memory_exists(), "Memory should exist.");
719                ensure_alignment(memarg.align, Type::I8)?;
720                ensure!(memarg.align == 0, "Alignment out of range");
721                state.pop_expect_opd(Known(ValueType::I32))?;
722                state.push_opd(Known(ValueType::I64));
723            }
724            OpCode::I64Load16S(memarg) => {
725                ensure!(context.memory_exists(), "Memory should exist.");
726                ensure_alignment(memarg.align, Type::I16)?;
727                state.pop_expect_opd(Known(ValueType::I32))?;
728                state.push_opd(Known(ValueType::I64));
729            }
730            OpCode::I64Load16U(memarg) => {
731                ensure!(context.memory_exists(), "Memory should exist.");
732                ensure_alignment(memarg.align, Type::I16)?;
733                state.pop_expect_opd(Known(ValueType::I32))?;
734                state.push_opd(Known(ValueType::I64));
735            }
736            OpCode::I64Load32S(memarg) => {
737                ensure!(context.memory_exists(), "Memory should exist.");
738                ensure_alignment(memarg.align, Type::I32)?;
739                state.pop_expect_opd(Known(ValueType::I32))?;
740                state.push_opd(Known(ValueType::I64));
741            }
742            OpCode::I64Load32U(memarg) => {
743                ensure!(context.memory_exists(), "Memory should exist.");
744                ensure_alignment(memarg.align, Type::I32)?;
745                state.pop_expect_opd(Known(ValueType::I32))?;
746                state.push_opd(Known(ValueType::I64));
747            }
748            OpCode::I32Store(memarg) => {
749                ensure!(context.memory_exists(), "Memory should exist.");
750                ensure_alignment(memarg.align, Type::I32)?;
751                state.pop_expect_opd(Known(ValueType::I32))?;
752                state.pop_expect_opd(Known(ValueType::I32))?;
753            }
754            OpCode::I64Store(memarg) => {
755                ensure!(context.memory_exists(), "Memory should exist.");
756                ensure_alignment(memarg.align, Type::I64)?;
757                state.pop_expect_opd(Known(ValueType::I64))?;
758                state.pop_expect_opd(Known(ValueType::I32))?;
759            }
760            OpCode::I32Store8(memarg) => {
761                ensure!(context.memory_exists(), "Memory should exist.");
762                ensure_alignment(memarg.align, Type::I8)?;
763                state.pop_expect_opd(Known(ValueType::I32))?;
764                state.pop_expect_opd(Known(ValueType::I32))?;
765            }
766            OpCode::I32Store16(memarg) => {
767                ensure!(context.memory_exists(), "Memory should exist.");
768                ensure_alignment(memarg.align, Type::I16)?;
769                state.pop_expect_opd(Known(ValueType::I32))?;
770                state.pop_expect_opd(Known(ValueType::I32))?;
771            }
772            OpCode::I64Store8(memarg) => {
773                ensure!(context.memory_exists(), "Memory should exist.");
774                ensure_alignment(memarg.align, Type::I8)?;
775                state.pop_expect_opd(Known(ValueType::I64))?;
776                state.pop_expect_opd(Known(ValueType::I32))?;
777            }
778            OpCode::I64Store16(memarg) => {
779                ensure!(context.memory_exists(), "Memory should exist.");
780                ensure_alignment(memarg.align, Type::I16)?;
781                state.pop_expect_opd(Known(ValueType::I64))?;
782                state.pop_expect_opd(Known(ValueType::I32))?;
783            }
784            OpCode::I64Store32(memarg) => {
785                ensure!(context.memory_exists(), "Memory should exist.");
786                ensure_alignment(memarg.align, Type::I32)?;
787                state.pop_expect_opd(Known(ValueType::I64))?;
788                state.pop_expect_opd(Known(ValueType::I32))?;
789            }
790            OpCode::MemorySize => {
791                ensure!(context.memory_exists(), "Memory should exist.");
792                state.push_opd(Known(ValueType::I32))
793            }
794            OpCode::MemoryGrow => {
795                ensure!(context.memory_exists(), "Memory should exist.");
796                state.pop_expect_opd(Known(ValueType::I32))?;
797                state.push_opd(Known(ValueType::I32))
798            }
799            OpCode::I32Const(_) => {
800                state.push_opd(Known(ValueType::I32));
801            }
802            OpCode::I64Const(_) => {
803                state.push_opd(Known(ValueType::I64));
804            }
805            OpCode::I32Eqz => {
806                state.pop_expect_opd(Known(ValueType::I32))?;
807                state.push_opd(Known(ValueType::I32));
808            }
809            OpCode::I32Eq
810            | OpCode::I32Ne
811            | OpCode::I32LtS
812            | OpCode::I32LtU
813            | OpCode::I32GtS
814            | OpCode::I32GtU
815            | OpCode::I32LeS
816            | OpCode::I32LeU
817            | OpCode::I32GeS
818            | OpCode::I32GeU => {
819                state.pop_expect_opd(Known(ValueType::I32))?;
820                state.pop_expect_opd(Known(ValueType::I32))?;
821                state.push_opd(Known(ValueType::I32));
822            }
823            OpCode::I64Eqz => {
824                state.pop_expect_opd(Known(ValueType::I64))?;
825                state.push_opd(Known(ValueType::I32));
826            }
827            OpCode::I64Eq
828            | OpCode::I64Ne
829            | OpCode::I64LtS
830            | OpCode::I64LtU
831            | OpCode::I64GtS
832            | OpCode::I64GtU
833            | OpCode::I64LeS
834            | OpCode::I64LeU
835            | OpCode::I64GeS
836            | OpCode::I64GeU => {
837                state.pop_expect_opd(Known(ValueType::I64))?;
838                state.pop_expect_opd(Known(ValueType::I64))?;
839                state.push_opd(Known(ValueType::I32));
840            }
841            OpCode::I32Clz | OpCode::I32Ctz | OpCode::I32Popcnt => {
842                state.pop_expect_opd(Known(ValueType::I32))?;
843                state.push_opd(Known(ValueType::I32));
844            }
845            OpCode::I32Add
846            | OpCode::I32Sub
847            | OpCode::I32Mul
848            | OpCode::I32DivS
849            | OpCode::I32DivU
850            | OpCode::I32RemS
851            | OpCode::I32RemU
852            | OpCode::I32And
853            | OpCode::I32Or
854            | OpCode::I32Xor
855            | OpCode::I32Shl
856            | OpCode::I32ShrS
857            | OpCode::I32ShrU
858            | OpCode::I32Rotl
859            | OpCode::I32Rotr => {
860                state.pop_expect_opd(Known(ValueType::I32))?;
861                state.pop_expect_opd(Known(ValueType::I32))?;
862                state.push_opd(Known(ValueType::I32));
863            }
864            OpCode::I64Clz | OpCode::I64Ctz | OpCode::I64Popcnt => {
865                state.pop_expect_opd(Known(ValueType::I64))?;
866                state.push_opd(Known(ValueType::I64));
867            }
868            OpCode::I64Add
869            | OpCode::I64Sub
870            | OpCode::I64Mul
871            | OpCode::I64DivS
872            | OpCode::I64DivU
873            | OpCode::I64RemS
874            | OpCode::I64RemU
875            | OpCode::I64And
876            | OpCode::I64Or
877            | OpCode::I64Xor
878            | OpCode::I64Shl
879            | OpCode::I64ShrS
880            | OpCode::I64ShrU
881            | OpCode::I64Rotl
882            | OpCode::I64Rotr => {
883                state.pop_expect_opd(Known(ValueType::I64))?;
884                state.pop_expect_opd(Known(ValueType::I64))?;
885                state.push_opd(Known(ValueType::I64));
886            }
887            OpCode::I32WrapI64 => {
888                state.pop_expect_opd(Known(ValueType::I64))?;
889                state.push_opd(Known(ValueType::I32));
890            }
891            OpCode::I64ExtendI32S | OpCode::I64ExtendI32U => {
892                state.pop_expect_opd(Known(ValueType::I32))?;
893                state.push_opd(Known(ValueType::I64));
894            }
895            OpCode::I32Extend8S | OpCode::I32Extend16S => {
896                state.pop_expect_opd(Known(ValueType::I32))?;
897                state.push_opd(Known(ValueType::I32));
898            }
899            OpCode::I64Extend8S | OpCode::I64Extend16S | OpCode::I64Extend32S => {
900                state.pop_expect_opd(Known(ValueType::I64))?;
901                state.push_opd(Known(ValueType::I64));
902            }
903        }
904        handler.handle_opcode(context, &state, unreachable_before, next_opcode)?;
905    }
906    if state.done() {
907        handler.finish(&state)
908    } else {
909        bail!("Improperly terminated instruction sequence.")
910    }
911}
912
913/// Validate an import according to the specific logic of the host.
914pub trait ValidateImportExport {
915    /// Validate an imported function signature.
916    /// The second argument indicates whether this import has a duplicate name.
917    fn validate_import_function(
918        &self,
919        duplicate: bool,
920        mod_name: &Name,
921        item_name: &Name,
922        ty: &FunctionType,
923    ) -> bool;
924
925    /// Validate an imported function signature.
926    /// The second argument indicates whether this import has a duplicate name.
927    fn validate_export_function(&self, item_name: &Name, ty: &FunctionType) -> bool;
928}
929
930/// Configuration for module validation.
931/// This determines the features of Wasm we support or not, depending on the
932/// protocol version.
933///
934/// Some Wasm features are not supported in any protocol versions, so are not
935/// part of this configuration. These include imported globals, floating point
936/// types, floating point operations, etc.
937#[derive(Copy, Clone, Debug)]
938pub struct ValidationConfig {
939    /// Allow locally defined globals in constant expressions when initializing
940    /// data and element sections. In protocols 1-5 this was allowed, but we
941    /// need to disallow it in following protocols since the Wasm spec has been
942    /// updated to not allow this anymore. See [issue](https://github.com/WebAssembly/spec/issues/1522) on the Wasm spec repository.
943    pub allow_globals_in_init:      bool,
944    /// Allow sign extension instructions. See [proposal](https://github.com/WebAssembly/sign-extension-ops/blob/master/proposals/sign-extension-ops/Overview.md).
945    pub allow_sign_extension_instr: bool,
946}
947
948impl ValidationConfig {
949    /// Validation configuration valid in protocols 1-5.
950    pub const V0: Self = Self {
951        allow_globals_in_init:      true,
952        allow_sign_extension_instr: false,
953    };
954    /// Validation configuration valid in protocol 6 and onward.
955    pub const V1: Self = Self {
956        allow_globals_in_init:      false,
957        allow_sign_extension_instr: true,
958    };
959}
960
961/// Validate the module. This function parses and validates the module at the
962/// same time, failing at the first encountered error.
963pub fn validate_module(
964    config: ValidationConfig,
965    imp: &impl ValidateImportExport,
966    skeleton: &Skeleton<'_>,
967) -> ValidateResult<Module> {
968    // This is a technicality, but we need to parse the custom sections to ensure
969    // that they are valid. Validity consists only of checking that the name part
970    // is properly encoded.
971    for cs in skeleton.custom.iter() {
972        parse_custom(cs)?;
973    }
974
975    // The type section is valid as long as it's well-formed.
976    let ty: TypeSection = parse_sec_with_default(EMPTY_CTX, &skeleton.ty)?;
977    // Imports are valid as long as they parse, and all the indices exist.
978    let import: ImportSection = parse_sec_with_default(EMPTY_CTX, &skeleton.import)?;
979    {
980        let mut seen_imports = BTreeSet::new();
981        for i in import.imports.iter() {
982            match i.description {
983                ImportDescription::Func {
984                    type_idx,
985                } => {
986                    if let Some(ty) = ty.get(type_idx) {
987                        let is_new = seen_imports.insert((&i.mod_name, &i.item_name));
988                        ensure!(
989                            imp.validate_import_function(!is_new, &i.mod_name, &i.item_name, ty),
990                            "Disallowed import."
991                        );
992                    } else {
993                        bail!("Import refers to a non-existent type.");
994                    }
995                }
996            }
997        }
998    }
999    // The table section is valid as long as it's well-formed.
1000    // We already check the limits at parse time.
1001    let table: TableSection = parse_sec_with_default(EMPTY_CTX, &skeleton.table)?;
1002
1003    // The memory section is valid as long as it's well-formed.
1004    // We already check the limits at parse time.
1005    let memory: MemorySection = parse_sec_with_default(EMPTY_CTX, &skeleton.memory)?;
1006
1007    // The global section is valid as long as it's well-formed.
1008    // We already check that all the globals are initialized with
1009    // correct expressions.
1010    let global: GlobalSection = parse_sec_with_default(config, &skeleton.global)?;
1011    ensure!(
1012        global.globals.len() <= MAX_NUM_GLOBALS,
1013        "The number of globals must not exceed {}.",
1014        MAX_NUM_GLOBALS
1015    );
1016
1017    // The start section is valid as long as it parses correctly.
1018    // We make sure that there is no content in the start section during parsing.
1019    let start = parse_sec_with_default(EMPTY_CTX, &skeleton.start)?;
1020
1021    // The function type section is valid if it parses properly, and all the indices
1022    // of types are valid.
1023    // The code section then needs to match.
1024    let func: FunctionSection = parse_sec_with_default(EMPTY_CTX, &skeleton.func)?;
1025    for &type_idx in func.types.iter() {
1026        ensure!(ty.get(type_idx).is_some(), "Function refers to a type that does not exist.")
1027    }
1028
1029    // Number of functions that can be referred to.
1030    // Since all imports must be functions we could just use length, but
1031    // in the interest of being more robust to changes we count imported functions
1032    // instead.
1033    let total_funcs =
1034        import.imports.iter().filter(|&x| Import::is_func(x)).count() + func.types.len();
1035
1036    let code: CodeSkeletonSection = parse_sec_with_default(EMPTY_CTX, &skeleton.code)?;
1037    ensure!(
1038        func.types.len() == code.impls.len(),
1039        "The number of functions in the function and code sections must match."
1040    );
1041    // an index of function types, merging imported and declared functions.
1042    let funcs = import
1043        .imports
1044        .iter()
1045        .map(|i| match i.description {
1046            ImportDescription::Func {
1047                type_idx,
1048            } => type_idx,
1049        })
1050        .chain(func.types.iter().copied())
1051        .collect::<Vec<TypeIndex>>();
1052
1053    let mut parsed_code = Vec::with_capacity(code.impls.len());
1054    for (&f, c) in func.types.iter().zip(code.impls) {
1055        match ty.get(f) {
1056            Some(func_ty) => {
1057                let (locals, num_locals) = make_locals(func_ty, &c.locals)?;
1058                let ctx = FunctionContext {
1059                    return_type: BlockType::from(func_ty.result),
1060                    globals: &global.globals,
1061                    funcs: &funcs,
1062                    types: &ty.types,
1063                    locals,
1064                    memory: memory.memory_type.is_some(),
1065                    table: table.table_type.is_some(),
1066                };
1067                let (opcodes, max_height) = validate(
1068                    &ctx,
1069                    &mut OpCodeIterator::new(config.allow_sign_extension_instr, c.expr_bytes),
1070                    PureWasmModuleHandler::default(),
1071                )?;
1072                ensure!(
1073                    num_locals as usize + max_height <= MAX_ALLOWED_STACK_HEIGHT,
1074                    "Stack height would exceed allowed limits."
1075                );
1076
1077                let code = Code {
1078                    ty: func_ty.clone(),
1079                    ty_idx: f,
1080                    num_locals,
1081                    locals: c.locals,
1082                    expr: Expression {
1083                        instrs: opcodes.instr,
1084                    },
1085                };
1086                parsed_code.push(code)
1087            }
1088            None => bail!("Function has a type that does not exist."),
1089        }
1090    }
1091    // Exports are mostly valid by parsing, but we need to make sure that
1092    // they are all distinct.
1093    let export: ExportSection = parse_sec_with_default(EMPTY_CTX, &skeleton.export)?;
1094    let mut export_names = BTreeSet::new();
1095    ensure!(export.exports.len() <= MAX_NUM_EXPORTS, "Module exceeds maximum number of exports.");
1096    for e in export.exports.iter() {
1097        // ensure the name is unique.
1098        ensure!(export_names.insert(&e.name), "Duplicate exports {}.", e.name);
1099
1100        match e.description {
1101            ExportDescription::Func {
1102                index,
1103            } => {
1104                if let Some(ty) = funcs.get(index as usize).and_then(|ty_idx| ty.get(*ty_idx)) {
1105                    ensure!(imp.validate_export_function(&e.name, ty), "Export function not valid.")
1106                } else {
1107                    bail!("Trying to export a function that does not exist.")
1108                }
1109            }
1110            ExportDescription::Table => {
1111                ensure!(
1112                    table.table_type.is_some(),
1113                    "Trying to export a table, but no table is declared."
1114                );
1115            }
1116            ExportDescription::Memory => {
1117                ensure!(
1118                    memory.memory_type.is_some(),
1119                    "Trying to export a memory, but no memory is declared."
1120                );
1121            }
1122            ExportDescription::Global {
1123                index,
1124            } => {
1125                ensure!(
1126                    global.get(index).is_some(),
1127                    "Trying to export a global that does not exist."
1128                );
1129            }
1130        }
1131    }
1132
1133    // The element section is almost well-formed by parsing.
1134    // Parsing already ensures that limits are well-formed, that
1135    // the offset expression is of the correct type and constant.
1136    // We additionally need to check that all the functions referred
1137    // to in the table are defined.
1138    let instr_validation_ctx = InstructionValidationContext {
1139        globals_allowed:            if config.allow_globals_in_init {
1140            Some(&global)
1141        } else {
1142            None
1143        },
1144        allow_sign_extension_instr: config.allow_sign_extension_instr,
1145    };
1146    let element: ElementSection = parse_sec_with_default(instr_validation_ctx, &skeleton.element)?;
1147    ensure!(
1148        element.elements.is_empty() || table.table_type.is_some(),
1149        "There is an elements section, but no table."
1150    );
1151    for elem in element.elements.iter() {
1152        let inits_len: u32 = elem.inits.len().try_into()?;
1153        ensure!(
1154            inits_len <= MAX_INIT_TABLE_SIZE,
1155            "Number of initial elements is more than the table size."
1156        );
1157        if let Some(table_type) = table.table_type.as_ref() {
1158            let offset = elem.offset as u32;
1159            // since we provide no way to grow the table the initial minimum size
1160            // is the size of the table, as specified in the allocation section of the
1161            // Wasm semantics.
1162            // The as u32 is safe beca
1163            let end = offset
1164                .checked_add(inits_len)
1165                .ok_or_else(|| anyhow!("The end of the table exceeds u32 max bound."))?;
1166            ensure!(
1167                end <= table_type.limits.min,
1168                "Initialization expression for the table exceeds table size {} > {}.",
1169                end,
1170                table_type.limits.min
1171            );
1172        }
1173        for &init in elem.inits.iter() {
1174            ensure!(
1175                (init as usize) < total_funcs,
1176                "Index in the element segment refers to a non-existent function."
1177            );
1178        }
1179    }
1180
1181    // The data section is almost well-formed by parsing.
1182    // Parsing already ensures that limits are well-formed, that
1183    // the offset expression is of the correct type and constant.
1184    // We additionally need to check that all the locations referred
1185    // to in the table are defined.
1186    let data: DataSection = parse_sec_with_default(instr_validation_ctx, &skeleton.data)?;
1187    // Make sure that if there are any data segments then a memory exists.
1188    // By parsing we already ensure that all the references are to a single memory
1189    // and that the initial memory is limited by MAX_INIT_MEMORY_SIZE.
1190    if let Some(memory_type) = memory.memory_type.as_ref() {
1191        for data in data.sections.iter() {
1192            let inits_len: u32 = data.init.len().try_into()?;
1193            ensure!(
1194                // this cannot overflow because we've already ensured limits.min <
1195                // MAX_INIT_MEMORY_SIZE
1196                inits_len <= memory_type.limits.min * PAGE_SIZE,
1197                "Number of initial elements is more than the initial memory size."
1198            );
1199            let offset: u32 = data.offset.try_into()?;
1200            let end = offset
1201                .checked_add(inits_len)
1202                .ok_or_else(|| anyhow!("The end of the memory exceeds u32 max bound."))?;
1203            ensure!(
1204                // by validation we have that memory_type.limits.min <= MAX_INIT_MEMORY_SIZE <
1205                // 2^16, so this cannot overflow but we're still being safe
1206                memory_type.limits.min.checked_mul(PAGE_SIZE).map_or(false, |l| end <= l),
1207                "Initialization expression for the data segment exceeds initial memory size {} > \
1208                 {}.",
1209                end,
1210                memory_type.limits.min * PAGE_SIZE
1211            );
1212        }
1213    } else {
1214        // There is no memory, so there should be no data section.
1215        ensure!(data.sections.is_empty(), "There are data sections, but no declared memory.");
1216    }
1217    Ok(Module {
1218        ty,
1219        import,
1220        func,
1221        table,
1222        memory,
1223        global,
1224        export,
1225        start,
1226        element,
1227        code: CodeSection {
1228            impls: parsed_code,
1229        },
1230        data,
1231    })
1232}