1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
use super::{
    CustomSectionsBuilder,
    ModuleBuilder,
    ModuleHeader,
    ModuleHeaderBuilder,
    ModuleParser,
};
use crate::{Error, Module, Read};
use core::ops::{Deref, DerefMut};
use std::vec::Vec;
use wasmparser::{Chunk, Payload, Validator};

/// A buffer for holding parsed payloads in bytes.
#[derive(Debug, Default, Clone)]
struct ParseBuffer {
    buffer: Vec<u8>,
}

impl ParseBuffer {
    /// Drops the first `amount` bytes from the [`ParseBuffer`] as they have been consumed.
    #[inline]
    fn consume(buffer: &mut Self, amount: usize) {
        buffer.drain(..amount);
    }

    /// Pulls more bytes from the `stream` in order to produce Wasm payload.
    ///
    /// Returns `true` if the parser reached the end of the stream.
    ///
    /// # Note
    ///
    /// Uses `hint` to efficiently preallocate enough space for the next payload.
    #[inline]
    fn pull_bytes(buffer: &mut Self, hint: u64, stream: &mut impl Read) -> Result<bool, Error> {
        // Use the hint to preallocate more space, then read
        // some more data into the buffer.
        //
        // Note that the buffer management here is not ideal,
        // but it's compact enough to fit in an example!
        let len = buffer.len();
        let new_len = len + hint as usize;
        buffer.resize(new_len, 0x0_u8);
        let read_bytes = stream.read(&mut buffer[len..])?;
        buffer.truncate(len + read_bytes);
        let reached_end = read_bytes == 0;
        Ok(reached_end)
    }
}

impl Deref for ParseBuffer {
    type Target = Vec<u8>;

    #[inline]
    fn deref(&self) -> &Self::Target {
        &self.buffer
    }
}

impl DerefMut for ParseBuffer {
    #[inline]
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.buffer
    }
}

impl ModuleParser {
    /// Parses and validates the Wasm bytecode `stream`.
    ///
    /// Returns the compiled and validated Wasm [`Module`] upon success.
    ///
    /// # Errors
    ///
    /// If the Wasm bytecode stream fails to validate.
    pub fn parse_streaming(mut self, stream: impl Read) -> Result<Module, Error> {
        let features = self.engine.config().wasm_features();
        self.validator = Some(Validator::new_with_features(features));
        // SAFETY: we just pre-populated the Wasm module parser with a validator
        //         thus calling this method is safe.
        unsafe { self.parse_streaming_impl(stream) }
    }

    /// Parses the Wasm bytecode `stream` without Wasm validation.
    ///
    /// Returns the compiled and validated Wasm [`Module`] upon success.
    ///
    /// # Safety
    ///
    /// The caller is responsible to make sure that the provided
    /// `stream` yields valid WebAssembly bytecode.
    ///
    /// # Errors
    ///
    /// If the Wasm bytecode stream fails to validate.
    pub unsafe fn parse_streaming_unchecked(self, stream: impl Read) -> Result<Module, Error> {
        unsafe { self.parse_streaming_impl(stream) }
    }

    /// Starts parsing and validating the Wasm bytecode stream.
    ///
    /// Returns the compiled and validated Wasm [`Module`] upon success.
    ///
    /// # Safety
    ///
    /// The caller is responsible to either
    ///
    /// 1) Populate the [`ModuleParser`] with a [`Validator`] prior to calling this method, OR;
    /// 2) Make sure that the provided `stream` yields valid WebAssembly bytecode.
    ///
    /// Otherwise this method has undefined behavior.
    ///
    /// # Errors
    ///
    /// If the Wasm bytecode stream fails to validate.
    unsafe fn parse_streaming_impl(mut self, mut stream: impl Read) -> Result<Module, Error> {
        let mut custom_sections = CustomSectionsBuilder::default();
        let mut buffer = ParseBuffer::default();
        let header = Self::parse_streaming_header(
            &mut self,
            &mut stream,
            &mut buffer,
            &mut custom_sections,
        )?;
        let builder = Self::parse_streaming_code(
            &mut self,
            &mut stream,
            &mut buffer,
            header,
            custom_sections,
        )?;
        let module = Self::parse_streaming_data(&mut self, &mut stream, &mut buffer, builder)?;
        Ok(module)
    }

    /// Parse the Wasm module header.
    ///
    /// - The Wasm module header is the set of all sections that appear before
    ///   the Wasm code section.
    /// - We separate parsing of the Wasm module header since the information of
    ///   the Wasm module header is required for translating the Wasm code section.
    ///
    /// # Errors
    ///
    /// If the Wasm bytecode stream fails to parse or validate.
    fn parse_streaming_header(
        &mut self,
        stream: &mut impl Read,
        buffer: &mut ParseBuffer,
        custom_sections: &mut CustomSectionsBuilder,
    ) -> Result<ModuleHeader, Error> {
        let mut header = ModuleHeaderBuilder::new(&self.engine);
        loop {
            match self.parser.parse(&buffer[..], self.eof)? {
                Chunk::NeedMoreData(hint) => {
                    self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?;
                    if self.eof {
                        break;
                    }
                }
                Chunk::Parsed { consumed, payload } => {
                    match payload {
                        Payload::Version {
                            num,
                            encoding,
                            range,
                        } => self.process_version(num, encoding, range),
                        Payload::TypeSection(section) => self.process_types(section, &mut header),
                        Payload::ImportSection(section) => {
                            self.process_imports(section, &mut header)
                        }
                        Payload::FunctionSection(section) => {
                            self.process_functions(section, &mut header)
                        }
                        Payload::TableSection(section) => self.process_tables(section, &mut header),
                        Payload::MemorySection(section) => {
                            self.process_memories(section, &mut header)
                        }
                        Payload::GlobalSection(section) => {
                            self.process_globals(section, &mut header)
                        }
                        Payload::ExportSection(section) => {
                            self.process_exports(section, &mut header)
                        }
                        Payload::StartSection { func, range } => {
                            self.process_start(func, range, &mut header)
                        }
                        Payload::ElementSection(section) => {
                            self.process_element(section, &mut header)
                        }
                        Payload::DataCountSection { count, range } => {
                            self.process_data_count(count, range)
                        }
                        Payload::CodeSectionStart { count, range, size } => {
                            self.process_code_start(count, range, size)?;
                            ParseBuffer::consume(buffer, consumed);
                            break;
                        }
                        Payload::DataSection(_) => break,
                        Payload::End(_) => break,
                        Payload::CustomSection(reader) => {
                            self.process_custom_section(custom_sections, reader)
                        }
                        unexpected => self.process_invalid_payload(unexpected),
                    }?;
                    // Cut away the parts from the intermediate buffer that have already been parsed.
                    ParseBuffer::consume(buffer, consumed);
                }
            }
        }
        Ok(header.finish())
    }

    /// Parse the Wasm code section entries.
    ///
    /// We separate parsing of the Wasm code section since most of a Wasm module
    /// is made up of code section entries which we can parse and validate more efficiently
    /// by serving them with a specialized routine.
    ///
    /// # Errors
    ///
    /// If the Wasm bytecode stream fails to parse or validate.
    fn parse_streaming_code(
        &mut self,
        stream: &mut impl Read,
        buffer: &mut ParseBuffer,
        header: ModuleHeader,
        custom_sections: CustomSectionsBuilder,
    ) -> Result<ModuleBuilder, Error> {
        loop {
            match self.parser.parse(&buffer[..], self.eof)? {
                Chunk::NeedMoreData(hint) => {
                    self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?;
                }
                Chunk::Parsed { consumed, payload } => {
                    match payload {
                        Payload::CodeSectionEntry(func_body) => {
                            // Note: Unfortunately the `wasmparser` crate is missing an API
                            //       to return the byte slice for the respective code section
                            //       entry payload. Please remove this work around as soon as
                            //       such an API becomes available.
                            let remaining = func_body.get_binary_reader().bytes_remaining();
                            let start = consumed - remaining;
                            let bytes = &buffer[start..consumed];
                            self.process_code_entry(func_body, bytes, &header)?;
                        }
                        _ => break,
                    }
                    // Cut away the parts from the intermediate buffer that have already been parsed.
                    ParseBuffer::consume(buffer, consumed);
                }
            }
        }
        Ok(ModuleBuilder::new(header, custom_sections))
    }

    /// Parse the Wasm data section and finalize parsing.
    ///
    /// We separate parsing of the Wasm data section since it is the only Wasm
    /// section that comes after the Wasm code section that we have to separate
    /// out for technical reasons.
    ///
    /// # Errors
    ///
    /// If the Wasm bytecode stream fails to parse or validate.
    fn parse_streaming_data(
        &mut self,
        stream: &mut impl Read,
        buffer: &mut ParseBuffer,
        mut builder: ModuleBuilder,
    ) -> Result<Module, Error> {
        loop {
            match self.parser.parse(&buffer[..], self.eof)? {
                Chunk::NeedMoreData(hint) => {
                    self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?;
                }
                Chunk::Parsed { consumed, payload } => {
                    match payload {
                        Payload::DataSection(section) => {
                            self.process_data(section, &mut builder)?;
                        }
                        Payload::End(offset) => {
                            self.process_end(offset)?;
                            ParseBuffer::consume(buffer, consumed);
                            break;
                        }
                        Payload::CustomSection(reader) => {
                            self.process_custom_section(&mut builder.custom_sections, reader)?
                        }
                        invalid => self.process_invalid_payload(invalid)?,
                    }
                    // Cut away the parts from the intermediate buffer that have already been parsed.
                    ParseBuffer::consume(buffer, consumed);
                }
            }
        }
        Ok(builder.finish(&self.engine))
    }
}