tinywasm_wasmparser/parser.rs
1use alloc::vec::Vec;
2
3use crate::binary_reader::WASM_MAGIC_NUMBER;
4use crate::std::fmt;
5use crate::std::iter;
6use crate::std::ops::Range;
7use crate::CoreTypeSectionReader;
8use crate::{
9 limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
10 ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
11 ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
12 ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
13 GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
14 SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
15};
16
17pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
18
19// Note that this started at `0xa` and we're incrementing up from there. When
20// the component model is stabilized this will become 0x1. The changes here are:
21//
22// * [????-??-??] 0xa - original version
23// * [2023-01-05] 0xb - `export` introduces an alias
24// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
25// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
26// allows for `(import (interface "...") ...)` syntax.
27pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
28
29const KIND_MODULE: u16 = 0x00;
30const KIND_COMPONENT: u16 = 0x01;
31
32/// The supported encoding formats for the parser.
33#[derive(Debug, Clone, Copy, Eq, PartialEq)]
34pub enum Encoding {
35 /// The encoding format is a WebAssembly module.
36 Module,
37 /// The encoding format is a WebAssembly component.
38 Component,
39}
40
41/// An incremental parser of a binary WebAssembly module or component.
42///
43/// This type is intended to be used to incrementally parse a WebAssembly module
44/// or component as bytes become available for the module. This can also be used
45/// to parse modules or components that are already entirely resident within memory.
46///
47/// This primary function for a parser is the [`Parser::parse`] function which
48/// will incrementally consume input. You can also use the [`Parser::parse_all`]
49/// function to parse a module or component that is entirely resident in memory.
50#[derive(Debug, Clone)]
51pub struct Parser {
52 state: State,
53 offset: u64,
54 max_size: u64,
55 encoding: Encoding,
56}
57
58#[derive(Debug, Clone)]
59enum State {
60 Header,
61 SectionStart,
62 FunctionBody { remaining: u32, len: u32 },
63}
64
65/// A successful return payload from [`Parser::parse`].
66///
67/// On success one of two possible values can be returned, either that more data
68/// is needed to continue parsing or a chunk of the input was parsed, indicating
69/// how much of it was parsed.
70#[derive(Debug)]
71pub enum Chunk<'a> {
72 /// This can be returned at any time and indicates that more data is needed
73 /// to proceed with parsing. Zero bytes were consumed from the input to
74 /// [`Parser::parse`]. The `usize` value here is a hint as to how many more
75 /// bytes are needed to continue parsing.
76 NeedMoreData(u64),
77
78 /// A chunk was successfully parsed.
79 Parsed {
80 /// This many bytes of the `data` input to [`Parser::parse`] were
81 /// consumed to produce `payload`.
82 consumed: usize,
83 /// The value that we actually parsed.
84 payload: Payload<'a>,
85 },
86}
87
88/// Values that can be parsed from a WebAssembly module or component.
89///
90/// This enumeration is all possible chunks of pieces that can be parsed by a
91/// [`Parser`] from a binary WebAssembly module or component. Note that for many
92/// sections the entire section is parsed all at once, whereas other functions,
93/// like the code section, are parsed incrementally. This is a distinction where some
94/// sections, like the type section, are required to be fully resident in memory
95/// (fully downloaded) before proceeding. Other sections, like the code section,
96/// can be processed in a streaming fashion where each function is extracted
97/// individually so it can possibly be shipped to another thread while you wait
98/// for more functions to get downloaded.
99///
100/// Note that payloads, when returned, do not indicate that the module or component
101/// is valid. For example when you receive a `Payload::TypeSection` the type
102/// section itself has not yet actually been parsed. The reader returned will be
103/// able to parse it, but you'll have to actually iterate the reader to do the
104/// full parse. Each payload returned is intended to be a *window* into the
105/// original `data` passed to [`Parser::parse`] which can be further processed
106/// if necessary.
107pub enum Payload<'a> {
108 /// Indicates the header of a WebAssembly module or component.
109 Version {
110 /// The version number found in the header.
111 num: u16,
112 /// The encoding format being parsed.
113 encoding: Encoding,
114 /// The range of bytes that were parsed to consume the header of the
115 /// module or component. Note that this range is relative to the start
116 /// of the byte stream.
117 range: Range<usize>,
118 },
119
120 /// A module type section was received and the provided reader can be
121 /// used to parse the contents of the type section.
122 TypeSection(TypeSectionReader<'a>),
123 /// A module import section was received and the provided reader can be
124 /// used to parse the contents of the import section.
125 ImportSection(ImportSectionReader<'a>),
126 /// A module function section was received and the provided reader can be
127 /// used to parse the contents of the function section.
128 FunctionSection(FunctionSectionReader<'a>),
129 /// A module table section was received and the provided reader can be
130 /// used to parse the contents of the table section.
131 TableSection(TableSectionReader<'a>),
132 /// A module memory section was received and the provided reader can be
133 /// used to parse the contents of the memory section.
134 MemorySection(MemorySectionReader<'a>),
135 /// A module tag section was received, and the provided reader can be
136 /// used to parse the contents of the tag section.
137 TagSection(TagSectionReader<'a>),
138 /// A module global section was received and the provided reader can be
139 /// used to parse the contents of the global section.
140 GlobalSection(GlobalSectionReader<'a>),
141 /// A module export section was received, and the provided reader can be
142 /// used to parse the contents of the export section.
143 ExportSection(ExportSectionReader<'a>),
144 /// A module start section was received.
145 StartSection {
146 /// The start function index
147 func: u32,
148 /// The range of bytes that specify the `func` field, specified in
149 /// offsets relative to the start of the byte stream.
150 range: Range<usize>,
151 },
152 /// A module element section was received and the provided reader can be
153 /// used to parse the contents of the element section.
154 ElementSection(ElementSectionReader<'a>),
155 /// A module data count section was received.
156 DataCountSection {
157 /// The number of data segments.
158 count: u32,
159 /// The range of bytes that specify the `count` field, specified in
160 /// offsets relative to the start of the byte stream.
161 range: Range<usize>,
162 },
163 /// A module data section was received and the provided reader can be
164 /// used to parse the contents of the data section.
165 DataSection(DataSectionReader<'a>),
166 /// Indicator of the start of the code section of a WebAssembly module.
167 ///
168 /// This entry is returned whenever the code section starts. The `count`
169 /// field indicates how many entries are in this code section. After
170 /// receiving this start marker you're guaranteed that the next `count`
171 /// items will be either `CodeSectionEntry` or an error will be returned.
172 ///
173 /// This, unlike other sections, is intended to be used for streaming the
174 /// contents of the code section. The code section is not required to be
175 /// fully resident in memory when we parse it. Instead a [`Parser`] is
176 /// capable of parsing piece-by-piece of a code section.
177 CodeSectionStart {
178 /// The number of functions in this section.
179 count: u32,
180 /// The range of bytes that represent this section, specified in
181 /// offsets relative to the start of the byte stream.
182 range: Range<usize>,
183 /// The size, in bytes, of the remaining contents of this section.
184 ///
185 /// This can be used in combination with [`Parser::skip_section`]
186 /// where the caller will know how many bytes to skip before feeding
187 /// bytes into `Parser` again.
188 size: u32,
189 },
190 /// An entry of the code section, a function, was parsed from a WebAssembly
191 /// module.
192 ///
193 /// This entry indicates that a function was successfully received from the
194 /// code section, and the payload here is the window into the original input
195 /// where the function resides. Note that the function itself has not been
196 /// parsed, it's only been outlined. You'll need to process the
197 /// `FunctionBody` provided to test whether it parses and/or is valid.
198 CodeSectionEntry(FunctionBody<'a>),
199
200 /// A core module section was received and the provided parser can be
201 /// used to parse the nested module.
202 ///
203 /// This variant is special in that it returns a sub-`Parser`. Upon
204 /// receiving a `ModuleSection` it is expected that the returned
205 /// `Parser` will be used instead of the parent `Parser` until the parse has
206 /// finished. You'll need to feed data into the `Parser` returned until it
207 /// returns `Payload::End`. After that you'll switch back to the parent
208 /// parser to resume parsing the rest of the current component.
209 ///
210 /// Note that binaries will not be parsed correctly if you feed the data for
211 /// a nested module into the parent [`Parser`].
212 ModuleSection {
213 /// The parser for the nested module.
214 parser: Parser,
215 /// The range of bytes that represent the nested module in the
216 /// original byte stream.
217 range: Range<usize>,
218 },
219 /// A core instance section was received and the provided parser can be
220 /// used to parse the contents of the core instance section.
221 ///
222 /// Currently this section is only parsed in a component.
223 InstanceSection(InstanceSectionReader<'a>),
224 /// A core type section was received and the provided parser can be
225 /// used to parse the contents of the core type section.
226 ///
227 /// Currently this section is only parsed in a component.
228 CoreTypeSection(CoreTypeSectionReader<'a>),
229 /// A component section from a WebAssembly component was received and the
230 /// provided parser can be used to parse the nested component.
231 ///
232 /// This variant is special in that it returns a sub-`Parser`. Upon
233 /// receiving a `ComponentSection` it is expected that the returned
234 /// `Parser` will be used instead of the parent `Parser` until the parse has
235 /// finished. You'll need to feed data into the `Parser` returned until it
236 /// returns `Payload::End`. After that you'll switch back to the parent
237 /// parser to resume parsing the rest of the current component.
238 ///
239 /// Note that binaries will not be parsed correctly if you feed the data for
240 /// a nested component into the parent [`Parser`].
241 ComponentSection {
242 /// The parser for the nested component.
243 parser: Parser,
244 /// The range of bytes that represent the nested component in the
245 /// original byte stream.
246 range: Range<usize>,
247 },
248 /// A component instance section was received and the provided reader can be
249 /// used to parse the contents of the component instance section.
250 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
251 /// A component alias section was received and the provided reader can be
252 /// used to parse the contents of the component alias section.
253 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
254 /// A component type section was received and the provided reader can be
255 /// used to parse the contents of the component type section.
256 ComponentTypeSection(ComponentTypeSectionReader<'a>),
257 /// A component canonical section was received and the provided reader can be
258 /// used to parse the contents of the component canonical section.
259 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
260 /// A component start section was received.
261 ComponentStartSection {
262 /// The start function description.
263 start: ComponentStartFunction,
264 /// The range of bytes that specify the `start` field.
265 range: Range<usize>,
266 },
267 /// A component import section was received and the provided reader can be
268 /// used to parse the contents of the component import section.
269 ComponentImportSection(ComponentImportSectionReader<'a>),
270 /// A component export section was received, and the provided reader can be
271 /// used to parse the contents of the component export section.
272 ComponentExportSection(ComponentExportSectionReader<'a>),
273
274 /// A module or component custom section was received.
275 CustomSection(CustomSectionReader<'a>),
276
277 /// An unknown section was found.
278 ///
279 /// This variant is returned for all unknown sections encountered. This
280 /// likely wants to be interpreted as an error by consumers of the parser,
281 /// but this can also be used to parse sections currently unsupported by
282 /// the parser.
283 UnknownSection {
284 /// The 8-bit identifier for this section.
285 id: u8,
286 /// The contents of this section.
287 contents: &'a [u8],
288 /// The range of bytes, relative to the start of the original data
289 /// stream, that the contents of this section reside in.
290 range: Range<usize>,
291 },
292
293 /// The end of the WebAssembly module or component was reached.
294 ///
295 /// The value is the offset in the input byte stream where the end
296 /// was reached.
297 End(usize),
298}
299
300const CUSTOM_SECTION: u8 = 0;
301const TYPE_SECTION: u8 = 1;
302const IMPORT_SECTION: u8 = 2;
303const FUNCTION_SECTION: u8 = 3;
304const TABLE_SECTION: u8 = 4;
305const MEMORY_SECTION: u8 = 5;
306const GLOBAL_SECTION: u8 = 6;
307const EXPORT_SECTION: u8 = 7;
308const START_SECTION: u8 = 8;
309const ELEMENT_SECTION: u8 = 9;
310const CODE_SECTION: u8 = 10;
311const DATA_SECTION: u8 = 11;
312const DATA_COUNT_SECTION: u8 = 12;
313const TAG_SECTION: u8 = 13;
314
315const COMPONENT_MODULE_SECTION: u8 = 1;
316const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
317const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
318const COMPONENT_SECTION: u8 = 4;
319const COMPONENT_INSTANCE_SECTION: u8 = 5;
320const COMPONENT_ALIAS_SECTION: u8 = 6;
321const COMPONENT_TYPE_SECTION: u8 = 7;
322const COMPONENT_CANONICAL_SECTION: u8 = 8;
323const COMPONENT_START_SECTION: u8 = 9;
324const COMPONENT_IMPORT_SECTION: u8 = 10;
325const COMPONENT_EXPORT_SECTION: u8 = 11;
326
327impl Parser {
328 /// Creates a new parser.
329 ///
330 /// Reports errors and ranges relative to `offset` provided, where `offset`
331 /// is some logical offset within the input stream that we're parsing.
332 pub fn new(offset: u64) -> Parser {
333 Parser {
334 state: State::Header,
335 offset,
336 max_size: u64::MAX,
337 // Assume the encoding is a module until we know otherwise
338 encoding: Encoding::Module,
339 }
340 }
341
342 /// Tests whether `bytes` looks like a core WebAssembly module.
343 ///
344 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
345 /// starts with the standard core WebAssembly header.
346 pub fn is_core_wasm(bytes: &[u8]) -> bool {
347 const HEADER: [u8; 8] = [
348 WASM_MAGIC_NUMBER[0],
349 WASM_MAGIC_NUMBER[1],
350 WASM_MAGIC_NUMBER[2],
351 WASM_MAGIC_NUMBER[3],
352 WASM_MODULE_VERSION.to_le_bytes()[0],
353 WASM_MODULE_VERSION.to_le_bytes()[1],
354 KIND_MODULE.to_le_bytes()[0],
355 KIND_MODULE.to_le_bytes()[1],
356 ];
357 bytes.starts_with(&HEADER)
358 }
359
360 /// Tests whether `bytes` looks like a WebAssembly component.
361 ///
362 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
363 /// starts with the standard WebAssembly component header.
364 pub fn is_component(bytes: &[u8]) -> bool {
365 const HEADER: [u8; 8] = [
366 WASM_MAGIC_NUMBER[0],
367 WASM_MAGIC_NUMBER[1],
368 WASM_MAGIC_NUMBER[2],
369 WASM_MAGIC_NUMBER[3],
370 WASM_COMPONENT_VERSION.to_le_bytes()[0],
371 WASM_COMPONENT_VERSION.to_le_bytes()[1],
372 KIND_COMPONENT.to_le_bytes()[0],
373 KIND_COMPONENT.to_le_bytes()[1],
374 ];
375 bytes.starts_with(&HEADER)
376 }
377
378 /// Attempts to parse a chunk of data.
379 ///
380 /// This method will attempt to parse the next incremental portion of a
381 /// WebAssembly binary. Data available for the module or component is
382 /// provided as `data`, and the data can be incomplete if more data has yet
383 /// to arrive. The `eof` flag indicates whether more data will ever be received.
384 ///
385 /// There are two ways parsing can succeed with this method:
386 ///
387 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
388 /// in `data` to parse a payload. The caller needs to wait for more data to
389 /// be available in this situation before calling this method again. It is
390 /// guaranteed that this is only returned if `eof` is `false`.
391 ///
392 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
393 /// successfully parsed. The payload is available in this variant of what
394 /// was parsed, and this also indicates how many bytes of `data` was
395 /// consumed. It's expected that the caller will not provide these bytes
396 /// back to the [`Parser`] again.
397 ///
398 /// Note that all `Chunk` return values are connected, with a lifetime, to
399 /// the input buffer. Each parsed chunk borrows the input buffer and is a
400 /// view into it for successfully parsed chunks.
401 ///
402 /// It is expected that you'll call this method until `Payload::End` is
403 /// reached, at which point you're guaranteed that the parse has completed.
404 /// Note that complete parsing, for the top-level module or component,
405 /// implies that `data` is empty and `eof` is `true`.
406 ///
407 /// # Errors
408 ///
409 /// Parse errors are returned as an `Err`. Errors can happen when the
410 /// structure of the data is unexpected or if sections are too large for
411 /// example. Note that errors are not returned for malformed *contents* of
412 /// sections here. Sections are generally not individually parsed and each
413 /// returned [`Payload`] needs to be iterated over further to detect all
414 /// errors.
415 ///
416 // /// # Examples
417 // ///
418 // /// An example of reading a wasm file from a stream (`std::io::Read`) and
419 // /// incrementally parsing it.
420 // ///
421 // /// ```
422 // /// use crate::std::io::Read;
423 // /// use anyhow::Result;
424 // /// use tinywasm_wasmparser::{Parser, Chunk, Payload::*};
425 // ///
426 // /// fn parse(mut reader: impl Read) -> Result<()> {
427 // /// let mut buf = Vec::new();
428 // /// let mut cur = Parser::new(0);
429 // /// let mut eof = false;
430 // /// let mut stack = Vec::new();
431 // ///
432 // /// loop {
433 // /// let (payload, consumed) = match cur.parse(&buf, eof)? {
434 // /// Chunk::NeedMoreData(hint) => {
435 // /// assert!(!eof); // otherwise an error would be returned
436 // ///
437 // /// // Use the hint to preallocate more space, then read
438 // /// // some more data into our buffer.
439 // /// //
440 // /// // Note that the buffer management here is not ideal,
441 // /// // but it's compact enough to fit in an example!
442 // /// let len = buf.len();
443 // /// buf.extend((0..hint).map(|_| 0u8));
444 // /// let n = reader.read(&mut buf[len..])?;
445 // /// buf.truncate(len + n);
446 // /// eof = n == 0;
447 // /// continue;
448 // /// }
449 // ///
450 // /// Chunk::Parsed { consumed, payload } => (payload, consumed),
451 // /// };
452 // ///
453 // /// match payload {
454 // /// // Sections for WebAssembly modules
455 // /// Version { .. } => { /* ... */ }
456 // /// TypeSection(_) => { /* ... */ }
457 // /// ImportSection(_) => { /* ... */ }
458 // /// FunctionSection(_) => { /* ... */ }
459 // /// TableSection(_) => { /* ... */ }
460 // /// MemorySection(_) => { /* ... */ }
461 // /// TagSection(_) => { /* ... */ }
462 // /// GlobalSection(_) => { /* ... */ }
463 // /// ExportSection(_) => { /* ... */ }
464 // /// StartSection { .. } => { /* ... */ }
465 // /// ElementSection(_) => { /* ... */ }
466 // /// DataCountSection { .. } => { /* ... */ }
467 // /// DataSection(_) => { /* ... */ }
468 // ///
469 // /// // Here we know how many functions we'll be receiving as
470 // /// // `CodeSectionEntry`, so we can prepare for that, and
471 // /// // afterwards we can parse and handle each function
472 // /// // individually.
473 // /// CodeSectionStart { .. } => { /* ... */ }
474 // /// CodeSectionEntry(body) => {
475 // /// // here we can iterate over `body` to parse the function
476 // /// // and its locals
477 // /// }
478 // ///
479 // /// // Sections for WebAssembly components
480 // /// InstanceSection(_) => { /* ... */ }
481 // /// CoreTypeSection(_) => { /* ... */ }
482 // /// ComponentInstanceSection(_) => { /* ... */ }
483 // /// ComponentAliasSection(_) => { /* ... */ }
484 // /// ComponentTypeSection(_) => { /* ... */ }
485 // /// ComponentCanonicalSection(_) => { /* ... */ }
486 // /// ComponentStartSection { .. } => { /* ... */ }
487 // /// ComponentImportSection(_) => { /* ... */ }
488 // /// ComponentExportSection(_) => { /* ... */ }
489 // ///
490 // /// ModuleSection { parser, .. }
491 // /// | ComponentSection { parser, .. } => {
492 // /// stack.push(cur.clone());
493 // /// cur = parser.clone();
494 // /// }
495 // ///
496 // /// CustomSection(_) => { /* ... */ }
497 // ///
498 // /// // most likely you'd return an error here
499 // /// UnknownSection { id, .. } => { /* ... */ }
500 // ///
501 // /// // Once we've reached the end of a parser we either resume
502 // /// // at the parent parser or we break out of the loop because
503 // /// // we're done.
504 // /// End(_) => {
505 // /// if let Some(parent_parser) = stack.pop() {
506 // /// cur = parent_parser;
507 // /// } else {
508 // /// break;
509 // /// }
510 // /// }
511 // /// }
512 // ///
513 // /// // once we're done processing the payload we can forget the
514 // /// // original.
515 // /// buf.drain(..consumed);
516 // /// }
517 // ///
518 // /// Ok(())
519 // /// }
520 // ///
521 // /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
522 // /// ```
523 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
524 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
525 (&data[..(self.max_size as usize)], true)
526 } else {
527 (data, eof)
528 };
529 // TODO: thread through `offset: u64` to `BinaryReader`, remove
530 // the cast here.
531 let mut reader = BinaryReader::new_with_offset(data, self.offset as usize);
532 match self.parse_reader(&mut reader, eof) {
533 Ok(payload) => {
534 // Be sure to update our offset with how far we got in the
535 // reader
536 self.offset += usize_to_u64(reader.position);
537 self.max_size -= usize_to_u64(reader.position);
538 Ok(Chunk::Parsed {
539 consumed: reader.position,
540 payload,
541 })
542 }
543 Err(e) => {
544 // If we're at EOF then there's no way we can recover from any
545 // error, so continue to propagate it.
546 if eof {
547 return Err(e);
548 }
549
550 // If our error doesn't look like it can be resolved with more
551 // data being pulled down, then propagate it, otherwise switch
552 // the error to "feed me please"
553 match e.inner.needed_hint {
554 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
555 None => Err(e),
556 }
557 }
558 }
559 }
560
561 fn parse_reader<'a>(
562 &mut self,
563 reader: &mut BinaryReader<'a>,
564 eof: bool,
565 ) -> Result<Payload<'a>> {
566 use Payload::*;
567
568 match self.state {
569 State::Header => {
570 let start = reader.original_position();
571 let header_version = reader.read_header_version()?;
572 self.encoding = match (header_version >> 16) as u16 {
573 KIND_MODULE => Encoding::Module,
574 KIND_COMPONENT => Encoding::Component,
575 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
576 };
577 let num = header_version as u16;
578 self.state = State::SectionStart;
579 Ok(Version {
580 num,
581 encoding: self.encoding,
582 range: start..reader.original_position(),
583 })
584 }
585 State::SectionStart => {
586 // If we're at eof and there are no bytes in our buffer, then
587 // that means we reached the end of the data since it's
588 // just a bunch of sections concatenated after the header.
589 if eof && reader.bytes_remaining() == 0 {
590 return Ok(Payload::End(reader.original_position()));
591 }
592
593 let id_pos = reader.position;
594 let id = reader.read_u8()?;
595 if id & 0x80 != 0 {
596 return Err(BinaryReaderError::new("malformed section id", id_pos));
597 }
598 let len_pos = reader.original_position();
599 let mut len = reader.read_var_u32()?;
600
601 // Test to make sure that this section actually fits within
602 // `Parser::max_size`. This doesn't matter for top-level modules
603 // but it is required for nested modules/components to correctly ensure
604 // that all sections live entirely within their section of the
605 // file.
606 let section_overflow = self
607 .max_size
608 .checked_sub(usize_to_u64(reader.position))
609 .and_then(|s| s.checked_sub(len.into()))
610 .is_none();
611 if section_overflow {
612 return Err(BinaryReaderError::new("section too large", len_pos));
613 }
614
615 match (self.encoding, id) {
616 // Sections for both modules and components.
617 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
618
619 // Module sections
620 (Encoding::Module, TYPE_SECTION) => {
621 section(reader, len, TypeSectionReader::new, TypeSection)
622 }
623 (Encoding::Module, IMPORT_SECTION) => {
624 section(reader, len, ImportSectionReader::new, ImportSection)
625 }
626 (Encoding::Module, FUNCTION_SECTION) => {
627 section(reader, len, FunctionSectionReader::new, FunctionSection)
628 }
629 (Encoding::Module, TABLE_SECTION) => {
630 section(reader, len, TableSectionReader::new, TableSection)
631 }
632 (Encoding::Module, MEMORY_SECTION) => {
633 section(reader, len, MemorySectionReader::new, MemorySection)
634 }
635 (Encoding::Module, GLOBAL_SECTION) => {
636 section(reader, len, GlobalSectionReader::new, GlobalSection)
637 }
638 (Encoding::Module, EXPORT_SECTION) => {
639 section(reader, len, ExportSectionReader::new, ExportSection)
640 }
641 (Encoding::Module, START_SECTION) => {
642 let (func, range) = single_item(reader, len, "start")?;
643 Ok(StartSection { func, range })
644 }
645 (Encoding::Module, ELEMENT_SECTION) => {
646 section(reader, len, ElementSectionReader::new, ElementSection)
647 }
648 (Encoding::Module, CODE_SECTION) => {
649 let start = reader.original_position();
650 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
651 let range = start..reader.original_position() + len as usize;
652 self.state = State::FunctionBody {
653 remaining: count,
654 len,
655 };
656 Ok(CodeSectionStart {
657 count,
658 range,
659 size: len,
660 })
661 }
662 (Encoding::Module, DATA_SECTION) => {
663 section(reader, len, DataSectionReader::new, DataSection)
664 }
665 (Encoding::Module, DATA_COUNT_SECTION) => {
666 let (count, range) = single_item(reader, len, "data count")?;
667 Ok(DataCountSection { count, range })
668 }
669 (Encoding::Module, TAG_SECTION) => {
670 section(reader, len, TagSectionReader::new, TagSection)
671 }
672
673 // Component sections
674 (Encoding::Component, COMPONENT_MODULE_SECTION)
675 | (Encoding::Component, COMPONENT_SECTION) => {
676 if len as usize > MAX_WASM_MODULE_SIZE {
677 bail!(
678 len_pos,
679 "{} section is too large",
680 if id == 1 { "module" } else { "component " }
681 );
682 }
683
684 let range =
685 reader.original_position()..reader.original_position() + len as usize;
686 self.max_size -= u64::from(len);
687 self.offset += u64::from(len);
688 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
689 parser.max_size = len.into();
690
691 Ok(match id {
692 1 => ModuleSection { parser, range },
693 4 => ComponentSection { parser, range },
694 _ => unreachable!(),
695 })
696 }
697 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
698 section(reader, len, InstanceSectionReader::new, InstanceSection)
699 }
700 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
701 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
702 }
703 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
704 reader,
705 len,
706 ComponentInstanceSectionReader::new,
707 ComponentInstanceSection,
708 ),
709 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
710 section(reader, len, SectionLimited::new, ComponentAliasSection)
711 }
712 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
713 reader,
714 len,
715 ComponentTypeSectionReader::new,
716 ComponentTypeSection,
717 ),
718 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
719 reader,
720 len,
721 ComponentCanonicalSectionReader::new,
722 ComponentCanonicalSection,
723 ),
724 (Encoding::Component, COMPONENT_START_SECTION) => {
725 let (start, range) = single_item(reader, len, "component start")?;
726 Ok(ComponentStartSection { start, range })
727 }
728 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
729 reader,
730 len,
731 ComponentImportSectionReader::new,
732 ComponentImportSection,
733 ),
734 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
735 reader,
736 len,
737 ComponentExportSectionReader::new,
738 ComponentExportSection,
739 ),
740 (_, id) => {
741 let offset = reader.original_position();
742 let contents = reader.read_bytes(len as usize)?;
743 let range = offset..offset + len as usize;
744 Ok(UnknownSection {
745 id,
746 contents,
747 range,
748 })
749 }
750 }
751 }
752
753 // Once we hit 0 remaining incrementally parsed items, with 0
754 // remaining bytes in each section, we're done and can switch back
755 // to parsing sections.
756 State::FunctionBody {
757 remaining: 0,
758 len: 0,
759 } => {
760 self.state = State::SectionStart;
761 self.parse_reader(reader, eof)
762 }
763
764 // ... otherwise trailing bytes with no remaining entries in these
765 // sections indicates an error.
766 State::FunctionBody { remaining: 0, len } => {
767 debug_assert!(len > 0);
768 let offset = reader.original_position();
769 Err(BinaryReaderError::new(
770 "trailing bytes at end of section",
771 offset,
772 ))
773 }
774
775 // Functions are relatively easy to parse when we know there's at
776 // least one remaining and at least one byte available to read
777 // things.
778 //
779 // We use the remaining length try to read a u32 size of the
780 // function, and using that size we require the entire function be
781 // resident in memory. This means that we're reading whole chunks of
782 // functions at a time.
783 //
784 // Limiting via `Parser::max_size` (nested parsing) happens above in
785 // `fn parse`, and limiting by our section size happens via
786 // `delimited`. Actual parsing of the function body is delegated to
787 // the caller to iterate over the `FunctionBody` structure.
788 State::FunctionBody { remaining, mut len } => {
789 let body = delimited(reader, &mut len, |r| {
790 let size = r.read_var_u32()?;
791 let offset = r.original_position();
792 Ok(FunctionBody::new(offset, r.read_bytes(size as usize)?))
793 })?;
794 self.state = State::FunctionBody {
795 remaining: remaining - 1,
796 len,
797 };
798 Ok(CodeSectionEntry(body))
799 }
800 }
801 }
802
803 /// Convenience function that can be used to parse a module or component
804 /// that is entirely resident in memory.
805 ///
806 /// This function will parse the `data` provided as a WebAssembly module
807 /// or component.
808 ///
809 /// Note that when this function yields sections that provide parsers,
810 /// no further action is required for those sections as payloads from
811 /// those parsers will be automatically returned.
812 ///
813 // /// # Examples
814 // ///
815 // /// An example of reading a wasm file from a stream (`std::io::Read`) into
816 // /// a buffer and then parsing it.
817 // ///
818 // /// ```
819 // /// use crate::std::io::Read;
820 // /// use anyhow::Result;
821 // /// use tinywasm_wasmparser::{Parser, Chunk, Payload::*};
822 // ///
823 // /// fn parse(mut reader: impl Read) -> Result<()> {
824 // /// let mut buf = Vec::new();
825 // /// reader.read_to_end(&mut buf)?;
826 // /// let parser = Parser::new(0);
827 // ///
828 // /// for payload in parser.parse_all(&buf) {
829 // /// match payload? {
830 // /// // Sections for WebAssembly modules
831 // /// Version { .. } => { /* ... */ }
832 // /// TypeSection(_) => { /* ... */ }
833 // /// ImportSection(_) => { /* ... */ }
834 // /// FunctionSection(_) => { /* ... */ }
835 // /// TableSection(_) => { /* ... */ }
836 // /// MemorySection(_) => { /* ... */ }
837 // /// TagSection(_) => { /* ... */ }
838 // /// GlobalSection(_) => { /* ... */ }
839 // /// ExportSection(_) => { /* ... */ }
840 // /// StartSection { .. } => { /* ... */ }
841 // /// ElementSection(_) => { /* ... */ }
842 // /// DataCountSection { .. } => { /* ... */ }
843 // /// DataSection(_) => { /* ... */ }
844 // ///
845 // /// // Here we know how many functions we'll be receiving as
846 // /// // `CodeSectionEntry`, so we can prepare for that, and
847 // /// // afterwards we can parse and handle each function
848 // /// // individually.
849 // /// CodeSectionStart { .. } => { /* ... */ }
850 // /// CodeSectionEntry(body) => {
851 // /// // here we can iterate over `body` to parse the function
852 // /// // and its locals
853 // /// }
854 // ///
855 // /// // Sections for WebAssembly components
856 // /// ModuleSection { .. } => { /* ... */ }
857 // /// InstanceSection(_) => { /* ... */ }
858 // /// CoreTypeSection(_) => { /* ... */ }
859 // /// ComponentSection { .. } => { /* ... */ }
860 // /// ComponentInstanceSection(_) => { /* ... */ }
861 // /// ComponentAliasSection(_) => { /* ... */ }
862 // /// ComponentTypeSection(_) => { /* ... */ }
863 // /// ComponentCanonicalSection(_) => { /* ... */ }
864 // /// ComponentStartSection { .. } => { /* ... */ }
865 // /// ComponentImportSection(_) => { /* ... */ }
866 // /// ComponentExportSection(_) => { /* ... */ }
867 // ///
868 // /// CustomSection(_) => { /* ... */ }
869 // ///
870 // /// // most likely you'd return an error here
871 // /// UnknownSection { id, .. } => { /* ... */ }
872 // ///
873 // /// // Once we've reached the end of a parser we either resume
874 // /// // at the parent parser or the payload iterator is at its
875 // /// // end and we're done.
876 // /// End(_) => {}
877 // /// }
878 // /// }
879 // ///
880 // /// Ok(())
881 // /// }
882 // ///
883 // /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
884 // /// ```
885 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
886 let mut stack = Vec::new();
887 let mut cur = self;
888 let mut done = false;
889 iter::from_fn(move || {
890 if done {
891 return None;
892 }
893 let payload = match cur.parse(data, true) {
894 // Propagate all errors
895 Err(e) => {
896 done = true;
897 return Some(Err(e));
898 }
899
900 // This isn't possible because `eof` is always true.
901 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
902
903 Ok(Chunk::Parsed { payload, consumed }) => {
904 data = &data[consumed..];
905 payload
906 }
907 };
908
909 match &payload {
910 Payload::ModuleSection { parser, .. }
911 | Payload::ComponentSection { parser, .. } => {
912 stack.push(cur.clone());
913 cur = parser.clone();
914 }
915 Payload::End(_) => match stack.pop() {
916 Some(p) => cur = p,
917 None => done = true,
918 },
919
920 _ => {}
921 }
922
923 Some(Ok(payload))
924 })
925 }
926
927 /// Skip parsing the code section entirely.
928 ///
929 /// This function can be used to indicate, after receiving
930 /// `CodeSectionStart`, that the section will not be parsed.
931 ///
932 /// The caller will be responsible for skipping `size` bytes (found in the
933 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
934 /// after the `size` bytes have been skipped.
935 ///
936 /// # Panics
937 ///
938 /// This function will panic if the parser is not in a state where it's
939 /// parsing the code section.
940 ///
941 // /// # Examples
942 // ///
943 // /// ```
944 // /// use tinywasm_wasmparser::{Result, Parser, Chunk, Payload::*};
945 // /// use crate::std::ops::Range;
946 // ///
947 // /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
948 // /// let mut parser = Parser::new(0);
949 // /// loop {
950 // /// let payload = match parser.parse(wasm, true)? {
951 // /// Chunk::Parsed { consumed, payload } => {
952 // /// wasm = &wasm[consumed..];
953 // /// payload
954 // /// }
955 // /// // this state isn't possible with `eof = true`
956 // /// Chunk::NeedMoreData(_) => unreachable!(),
957 // /// };
958 // /// match payload {
959 // /// TypeSection(s) => print_range("type section", &s.range()),
960 // /// ImportSection(s) => print_range("import section", &s.range()),
961 // /// // .. other sections
962 // ///
963 // /// // Print the range of the code section we see, but don't
964 // /// // actually iterate over each individual function.
965 // /// CodeSectionStart { range, size, .. } => {
966 // /// print_range("code section", &range);
967 // /// parser.skip_section();
968 // /// wasm = &wasm[size as usize..];
969 // /// }
970 // /// End(_) => break,
971 // /// _ => {}
972 // /// }
973 // /// }
974 // /// Ok(())
975 // /// }
976 // ///
977 // /// fn print_range(section: &str, range: &Range<usize>) {
978 // /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
979 // /// }
980 // /// ```
981 pub fn skip_section(&mut self) {
982 let skip = match self.state {
983 State::FunctionBody { remaining: _, len } => len,
984 _ => panic!("wrong state to call `skip_section`"),
985 };
986 self.offset += u64::from(skip);
987 self.max_size -= u64::from(skip);
988 self.state = State::SectionStart;
989 }
990}
991
992fn usize_to_u64(a: usize) -> u64 {
993 a.try_into().unwrap()
994}
995
996/// Parses an entire section resident in memory into a `Payload`.
997///
998/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
999/// to construct the section to return.
1000fn section<'a, T>(
1001 reader: &mut BinaryReader<'a>,
1002 len: u32,
1003 ctor: fn(&'a [u8], usize) -> Result<T>,
1004 variant: fn(T) -> Payload<'a>,
1005) -> Result<Payload<'a>> {
1006 let offset = reader.original_position();
1007 let payload = reader.read_bytes(len as usize)?;
1008 // clear the hint for "need this many more bytes" here because we already
1009 // read all the bytes, so it's not possible to read more bytes if this
1010 // fails.
1011 let reader = ctor(payload, offset).map_err(clear_hint)?;
1012 Ok(variant(reader))
1013}
1014
1015/// Reads a section that is represented by a single uleb-encoded `u32`.
1016fn single_item<'a, T>(
1017 reader: &mut BinaryReader<'a>,
1018 len: u32,
1019 desc: &str,
1020) -> Result<(T, Range<usize>)>
1021where
1022 T: FromReader<'a>,
1023{
1024 let range = reader.original_position()..reader.original_position() + len as usize;
1025 let mut content = BinaryReader::new_with_offset(reader.read_bytes(len as usize)?, range.start);
1026 // We can't recover from "unexpected eof" here because our entire section is
1027 // already resident in memory, so clear the hint for how many more bytes are
1028 // expected.
1029 let ret = content.read().map_err(clear_hint)?;
1030 if !content.eof() {
1031 bail!(
1032 content.original_position(),
1033 "unexpected content in the {desc} section",
1034 );
1035 }
1036 Ok((ret, range))
1037}
1038
1039/// Attempts to parse using `f`.
1040///
1041/// This will update `*len` with the number of bytes consumed, and it will cause
1042/// a failure to be returned instead of the number of bytes consumed exceeds
1043/// what `*len` currently is.
1044fn delimited<'a, T>(
1045 reader: &mut BinaryReader<'a>,
1046 len: &mut u32,
1047 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1048) -> Result<T> {
1049 let start = reader.position;
1050 let ret = f(reader)?;
1051 *len = match (reader.position - start)
1052 .try_into()
1053 .ok()
1054 .and_then(|i| len.checked_sub(i))
1055 {
1056 Some(i) => i,
1057 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1058 };
1059 Ok(ret)
1060}
1061
1062impl Default for Parser {
1063 fn default() -> Parser {
1064 Parser::new(0)
1065 }
1066}
1067
1068impl Payload<'_> {
1069 /// If this `Payload` represents a section in the original wasm module then
1070 /// the section's id and range within the original wasm binary are returned.
1071 ///
1072 /// Not all payloads refer to entire sections, such as the `Version` and
1073 /// `CodeSectionEntry` variants. These variants will return `None` from this
1074 /// function.
1075 ///
1076 /// Otherwise this function will return `Some` where the first element is
1077 /// the byte identifier for the section and the second element is the range
1078 /// of the contents of the section within the original wasm binary.
1079 ///
1080 /// The purpose of this method is to enable tools to easily iterate over
1081 /// entire sections if necessary and handle sections uniformly, for example
1082 /// dropping custom sections while preserving all other sections.
1083 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1084 use Payload::*;
1085
1086 match self {
1087 Version { .. } => None,
1088 TypeSection(s) => Some((TYPE_SECTION, s.range())),
1089 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1090 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1091 TableSection(s) => Some((TABLE_SECTION, s.range())),
1092 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1093 TagSection(s) => Some((TAG_SECTION, s.range())),
1094 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1095 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1096 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1097 DataSection(s) => Some((DATA_SECTION, s.range())),
1098 StartSection { range, .. } => Some((START_SECTION, range.clone())),
1099 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1100 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1101 CodeSectionEntry(_) => None,
1102
1103 ModuleSection { range, .. } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1104 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1105 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1106 ComponentSection { range, .. } => Some((COMPONENT_SECTION, range.clone())),
1107 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1108 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1109 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1110 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1111 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1112 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1113 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1114
1115 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1116
1117 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1118
1119 End(_) => None,
1120 }
1121 }
1122}
1123
1124impl fmt::Debug for Payload<'_> {
1125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126 use Payload::*;
1127 match self {
1128 Version {
1129 num,
1130 encoding,
1131 range,
1132 } => f
1133 .debug_struct("Version")
1134 .field("num", num)
1135 .field("encoding", encoding)
1136 .field("range", range)
1137 .finish(),
1138
1139 // Module sections
1140 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1141 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1142 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1143 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1144 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1145 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1146 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1147 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1148 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1149 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1150 StartSection { func, range } => f
1151 .debug_struct("StartSection")
1152 .field("func", func)
1153 .field("range", range)
1154 .finish(),
1155 DataCountSection { count, range } => f
1156 .debug_struct("DataCountSection")
1157 .field("count", count)
1158 .field("range", range)
1159 .finish(),
1160 CodeSectionStart { count, range, size } => f
1161 .debug_struct("CodeSectionStart")
1162 .field("count", count)
1163 .field("range", range)
1164 .field("size", size)
1165 .finish(),
1166 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1167
1168 // Component sections
1169 ModuleSection { parser: _, range } => f
1170 .debug_struct("ModuleSection")
1171 .field("range", range)
1172 .finish(),
1173 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1174 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1175 ComponentSection { parser: _, range } => f
1176 .debug_struct("ComponentSection")
1177 .field("range", range)
1178 .finish(),
1179 ComponentInstanceSection(_) => f
1180 .debug_tuple("ComponentInstanceSection")
1181 .field(&"...")
1182 .finish(),
1183 ComponentAliasSection(_) => f
1184 .debug_tuple("ComponentAliasSection")
1185 .field(&"...")
1186 .finish(),
1187 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1188 ComponentCanonicalSection(_) => f
1189 .debug_tuple("ComponentCanonicalSection")
1190 .field(&"...")
1191 .finish(),
1192 ComponentStartSection { .. } => f
1193 .debug_tuple("ComponentStartSection")
1194 .field(&"...")
1195 .finish(),
1196 ComponentImportSection(_) => f
1197 .debug_tuple("ComponentImportSection")
1198 .field(&"...")
1199 .finish(),
1200 ComponentExportSection(_) => f
1201 .debug_tuple("ComponentExportSection")
1202 .field(&"...")
1203 .finish(),
1204
1205 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1206
1207 UnknownSection { id, range, .. } => f
1208 .debug_struct("UnknownSection")
1209 .field("id", id)
1210 .field("range", range)
1211 .finish(),
1212
1213 End(offset) => f.debug_tuple("End").field(offset).finish(),
1214 }
1215 }
1216}
1217
1218fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1219 err.inner.needed_hint = None;
1220 err
1221}
1222
1223#[cfg(test)]
1224mod tests {
1225 use super::*;
1226
1227 macro_rules! assert_matches {
1228 ($a:expr, $b:pat $(,)?) => {
1229 match $a {
1230 $b => {}
1231 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1232 }
1233 };
1234 }
1235
1236 #[test]
1237 fn header() {
1238 assert!(Parser::default().parse(&[], true).is_err());
1239 assert_matches!(
1240 Parser::default().parse(&[], false),
1241 Ok(Chunk::NeedMoreData(4)),
1242 );
1243 assert_matches!(
1244 Parser::default().parse(b"\0", false),
1245 Ok(Chunk::NeedMoreData(3)),
1246 );
1247 assert_matches!(
1248 Parser::default().parse(b"\0asm", false),
1249 Ok(Chunk::NeedMoreData(4)),
1250 );
1251 assert_matches!(
1252 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1253 Ok(Chunk::Parsed {
1254 consumed: 8,
1255 payload: Payload::Version { num: 1, .. },
1256 }),
1257 );
1258 }
1259
1260 #[test]
1261 fn header_iter() {
1262 for _ in Parser::default().parse_all(&[]) {}
1263 for _ in Parser::default().parse_all(b"\0") {}
1264 for _ in Parser::default().parse_all(b"\0asm") {}
1265 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1266 }
1267
1268 fn parser_after_header() -> Parser {
1269 let mut p = Parser::default();
1270 assert_matches!(
1271 p.parse(b"\0asm\x01\0\0\0", false),
1272 Ok(Chunk::Parsed {
1273 consumed: 8,
1274 payload: Payload::Version {
1275 num: WASM_MODULE_VERSION,
1276 encoding: Encoding::Module,
1277 ..
1278 },
1279 }),
1280 );
1281 p
1282 }
1283
1284 fn parser_after_component_header() -> Parser {
1285 let mut p = Parser::default();
1286 assert_matches!(
1287 p.parse(b"\0asm\x0d\0\x01\0", false),
1288 Ok(Chunk::Parsed {
1289 consumed: 8,
1290 payload: Payload::Version {
1291 num: WASM_COMPONENT_VERSION,
1292 encoding: Encoding::Component,
1293 ..
1294 },
1295 }),
1296 );
1297 p
1298 }
1299
1300 #[test]
1301 fn start_section() {
1302 assert_matches!(
1303 parser_after_header().parse(&[], false),
1304 Ok(Chunk::NeedMoreData(1)),
1305 );
1306 assert!(parser_after_header().parse(&[8], true).is_err());
1307 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1308 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1309 assert_matches!(
1310 parser_after_header().parse(&[8], false),
1311 Ok(Chunk::NeedMoreData(1)),
1312 );
1313 assert_matches!(
1314 parser_after_header().parse(&[8, 1], false),
1315 Ok(Chunk::NeedMoreData(1)),
1316 );
1317 assert_matches!(
1318 parser_after_header().parse(&[8, 2], false),
1319 Ok(Chunk::NeedMoreData(2)),
1320 );
1321 assert_matches!(
1322 parser_after_header().parse(&[8, 1, 1], false),
1323 Ok(Chunk::Parsed {
1324 consumed: 3,
1325 payload: Payload::StartSection { func: 1, .. },
1326 }),
1327 );
1328 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1329 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1330 }
1331
1332 #[test]
1333 fn end_works() {
1334 assert_matches!(
1335 parser_after_header().parse(&[], true),
1336 Ok(Chunk::Parsed {
1337 consumed: 0,
1338 payload: Payload::End(8),
1339 }),
1340 );
1341 }
1342
1343 #[test]
1344 fn type_section() {
1345 assert!(parser_after_header().parse(&[1], true).is_err());
1346 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1347 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1348 assert_matches!(
1349 parser_after_header().parse(&[1], false),
1350 Ok(Chunk::NeedMoreData(1)),
1351 );
1352 assert_matches!(
1353 parser_after_header().parse(&[1, 1], false),
1354 Ok(Chunk::NeedMoreData(1)),
1355 );
1356 assert_matches!(
1357 parser_after_header().parse(&[1, 1, 1], false),
1358 Ok(Chunk::Parsed {
1359 consumed: 3,
1360 payload: Payload::TypeSection(_),
1361 }),
1362 );
1363 assert_matches!(
1364 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1365 Ok(Chunk::Parsed {
1366 consumed: 3,
1367 payload: Payload::TypeSection(_),
1368 }),
1369 );
1370 }
1371
1372 #[test]
1373 fn custom_section() {
1374 assert!(parser_after_header().parse(&[0], true).is_err());
1375 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1376 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1377 assert_matches!(
1378 parser_after_header().parse(&[0, 2, 1], false),
1379 Ok(Chunk::NeedMoreData(1)),
1380 );
1381 assert_matches!(
1382 parser_after_header().parse(&[0, 1, 0], false),
1383 Ok(Chunk::Parsed {
1384 consumed: 3,
1385 payload: Payload::CustomSection(CustomSectionReader {
1386 name: "",
1387 data_offset: 11,
1388 data: b"",
1389 range: Range { start: 10, end: 11 },
1390 }),
1391 }),
1392 );
1393 assert_matches!(
1394 parser_after_header().parse(&[0, 2, 1, b'a'], false),
1395 Ok(Chunk::Parsed {
1396 consumed: 4,
1397 payload: Payload::CustomSection(CustomSectionReader {
1398 name: "a",
1399 data_offset: 12,
1400 data: b"",
1401 range: Range { start: 10, end: 12 },
1402 }),
1403 }),
1404 );
1405 assert_matches!(
1406 parser_after_header().parse(&[0, 2, 0, b'a'], false),
1407 Ok(Chunk::Parsed {
1408 consumed: 4,
1409 payload: Payload::CustomSection(CustomSectionReader {
1410 name: "",
1411 data_offset: 11,
1412 data: b"a",
1413 range: Range { start: 10, end: 12 },
1414 }),
1415 }),
1416 );
1417 }
1418
1419 #[test]
1420 fn function_section() {
1421 assert!(parser_after_header().parse(&[10], true).is_err());
1422 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1423 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1424 assert_matches!(
1425 parser_after_header().parse(&[10], false),
1426 Ok(Chunk::NeedMoreData(1))
1427 );
1428 assert_matches!(
1429 parser_after_header().parse(&[10, 1], false),
1430 Ok(Chunk::NeedMoreData(1))
1431 );
1432 let mut p = parser_after_header();
1433 assert_matches!(
1434 p.parse(&[10, 1, 0], false),
1435 Ok(Chunk::Parsed {
1436 consumed: 3,
1437 payload: Payload::CodeSectionStart { count: 0, .. },
1438 }),
1439 );
1440 assert_matches!(
1441 p.parse(&[], true),
1442 Ok(Chunk::Parsed {
1443 consumed: 0,
1444 payload: Payload::End(11),
1445 }),
1446 );
1447 let mut p = parser_after_header();
1448 assert_matches!(
1449 p.parse(&[10, 2, 1, 0], false),
1450 Ok(Chunk::Parsed {
1451 consumed: 3,
1452 payload: Payload::CodeSectionStart { count: 1, .. },
1453 }),
1454 );
1455 assert_matches!(
1456 p.parse(&[0], false),
1457 Ok(Chunk::Parsed {
1458 consumed: 1,
1459 payload: Payload::CodeSectionEntry(_),
1460 }),
1461 );
1462 assert_matches!(
1463 p.parse(&[], true),
1464 Ok(Chunk::Parsed {
1465 consumed: 0,
1466 payload: Payload::End(12),
1467 }),
1468 );
1469
1470 // 1 byte section with 1 function can't read the function body because
1471 // the section is too small
1472 let mut p = parser_after_header();
1473 assert_matches!(
1474 p.parse(&[10, 1, 1], false),
1475 Ok(Chunk::Parsed {
1476 consumed: 3,
1477 payload: Payload::CodeSectionStart { count: 1, .. },
1478 }),
1479 );
1480 assert_eq!(
1481 p.parse(&[0], false).unwrap_err().message(),
1482 "unexpected end-of-file"
1483 );
1484
1485 // section with 2 functions but section is cut off
1486 let mut p = parser_after_header();
1487 assert_matches!(
1488 p.parse(&[10, 2, 2], false),
1489 Ok(Chunk::Parsed {
1490 consumed: 3,
1491 payload: Payload::CodeSectionStart { count: 2, .. },
1492 }),
1493 );
1494 assert_matches!(
1495 p.parse(&[0], false),
1496 Ok(Chunk::Parsed {
1497 consumed: 1,
1498 payload: Payload::CodeSectionEntry(_),
1499 }),
1500 );
1501 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1502 assert_eq!(
1503 p.parse(&[0], false).unwrap_err().message(),
1504 "unexpected end-of-file",
1505 );
1506
1507 // trailing data is bad
1508 let mut p = parser_after_header();
1509 assert_matches!(
1510 p.parse(&[10, 3, 1], false),
1511 Ok(Chunk::Parsed {
1512 consumed: 3,
1513 payload: Payload::CodeSectionStart { count: 1, .. },
1514 }),
1515 );
1516 assert_matches!(
1517 p.parse(&[0], false),
1518 Ok(Chunk::Parsed {
1519 consumed: 1,
1520 payload: Payload::CodeSectionEntry(_),
1521 }),
1522 );
1523 assert_eq!(
1524 p.parse(&[0], false).unwrap_err().message(),
1525 "trailing bytes at end of section",
1526 );
1527 }
1528
1529 #[test]
1530 fn single_module() {
1531 let mut p = parser_after_component_header();
1532 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1533
1534 // A module that's 8 bytes in length
1535 let mut sub = match p.parse(&[1, 8], false) {
1536 Ok(Chunk::Parsed {
1537 consumed: 2,
1538 payload: Payload::ModuleSection { parser, .. },
1539 }) => parser,
1540 other => panic!("bad parse {:?}", other),
1541 };
1542
1543 // Parse the header of the submodule with the sub-parser.
1544 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1545 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1546 assert_matches!(
1547 sub.parse(b"\0asm\x01\0\0\0", false),
1548 Ok(Chunk::Parsed {
1549 consumed: 8,
1550 payload: Payload::Version {
1551 num: 1,
1552 encoding: Encoding::Module,
1553 ..
1554 },
1555 }),
1556 );
1557
1558 // The sub-parser should be byte-limited so the next byte shouldn't get
1559 // consumed, it's intended for the parent parser.
1560 assert_matches!(
1561 sub.parse(&[10], false),
1562 Ok(Chunk::Parsed {
1563 consumed: 0,
1564 payload: Payload::End(18),
1565 }),
1566 );
1567
1568 // The parent parser should now be back to resuming, and we simulate it
1569 // being done with bytes to ensure that it's safely at the end,
1570 // completing the module code section.
1571 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1572 assert_matches!(
1573 p.parse(&[], true),
1574 Ok(Chunk::Parsed {
1575 consumed: 0,
1576 payload: Payload::End(18),
1577 }),
1578 );
1579 }
1580
1581 #[test]
1582 fn nested_section_too_big() {
1583 let mut p = parser_after_component_header();
1584
1585 // A module that's 10 bytes in length
1586 let mut sub = match p.parse(&[1, 10], false) {
1587 Ok(Chunk::Parsed {
1588 consumed: 2,
1589 payload: Payload::ModuleSection { parser, .. },
1590 }) => parser,
1591 other => panic!("bad parse {:?}", other),
1592 };
1593
1594 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1595 // module.
1596 assert_matches!(
1597 sub.parse(b"\0asm\x01\0\0\0", false),
1598 Ok(Chunk::Parsed {
1599 consumed: 8,
1600 payload: Payload::Version { num: 1, .. },
1601 }),
1602 );
1603
1604 // We can't parse a section which declares its bigger than the outer
1605 // module. This is a custom section, one byte big, with one content byte. The
1606 // content byte, however, lives outside of the parent's module code
1607 // section.
1608 assert_eq!(
1609 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1610 "section too large",
1611 );
1612 }
1613}