tinywasm_wasmparser/
parser.rs

1use alloc::vec::Vec;
2
3use crate::binary_reader::WASM_MAGIC_NUMBER;
4use crate::std::fmt;
5use crate::std::iter;
6use crate::std::ops::Range;
7use crate::CoreTypeSectionReader;
8use crate::{
9    limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
10    ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
11    ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
12    ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
13    GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
14    SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
15};
16
17pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
18
19// Note that this started at `0xa` and we're incrementing up from there. When
20// the component model is stabilized this will become 0x1. The changes here are:
21//
22// * [????-??-??] 0xa - original version
23// * [2023-01-05] 0xb - `export` introduces an alias
24// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
25// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
26//                      allows for `(import (interface "...") ...)` syntax.
27pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
28
29const KIND_MODULE: u16 = 0x00;
30const KIND_COMPONENT: u16 = 0x01;
31
32/// The supported encoding formats for the parser.
33#[derive(Debug, Clone, Copy, Eq, PartialEq)]
34pub enum Encoding {
35    /// The encoding format is a WebAssembly module.
36    Module,
37    /// The encoding format is a WebAssembly component.
38    Component,
39}
40
41/// An incremental parser of a binary WebAssembly module or component.
42///
43/// This type is intended to be used to incrementally parse a WebAssembly module
44/// or component as bytes become available for the module. This can also be used
45/// to parse modules or components that are already entirely resident within memory.
46///
47/// This primary function for a parser is the [`Parser::parse`] function which
48/// will incrementally consume input. You can also use the [`Parser::parse_all`]
49/// function to parse a module or component that is entirely resident in memory.
50#[derive(Debug, Clone)]
51pub struct Parser {
52    state: State,
53    offset: u64,
54    max_size: u64,
55    encoding: Encoding,
56}
57
58#[derive(Debug, Clone)]
59enum State {
60    Header,
61    SectionStart,
62    FunctionBody { remaining: u32, len: u32 },
63}
64
65/// A successful return payload from [`Parser::parse`].
66///
67/// On success one of two possible values can be returned, either that more data
68/// is needed to continue parsing or a chunk of the input was parsed, indicating
69/// how much of it was parsed.
70#[derive(Debug)]
71pub enum Chunk<'a> {
72    /// This can be returned at any time and indicates that more data is needed
73    /// to proceed with parsing. Zero bytes were consumed from the input to
74    /// [`Parser::parse`]. The `usize` value here is a hint as to how many more
75    /// bytes are needed to continue parsing.
76    NeedMoreData(u64),
77
78    /// A chunk was successfully parsed.
79    Parsed {
80        /// This many bytes of the `data` input to [`Parser::parse`] were
81        /// consumed to produce `payload`.
82        consumed: usize,
83        /// The value that we actually parsed.
84        payload: Payload<'a>,
85    },
86}
87
88/// Values that can be parsed from a WebAssembly module or component.
89///
90/// This enumeration is all possible chunks of pieces that can be parsed by a
91/// [`Parser`] from a binary WebAssembly module or component. Note that for many
92/// sections the entire section is parsed all at once, whereas other functions,
93/// like the code section, are parsed incrementally. This is a distinction where some
94/// sections, like the type section, are required to be fully resident in memory
95/// (fully downloaded) before proceeding. Other sections, like the code section,
96/// can be processed in a streaming fashion where each function is extracted
97/// individually so it can possibly be shipped to another thread while you wait
98/// for more functions to get downloaded.
99///
100/// Note that payloads, when returned, do not indicate that the module or component
101/// is valid. For example when you receive a `Payload::TypeSection` the type
102/// section itself has not yet actually been parsed. The reader returned will be
103/// able to parse it, but you'll have to actually iterate the reader to do the
104/// full parse. Each payload returned is intended to be a *window* into the
105/// original `data` passed to [`Parser::parse`] which can be further processed
106/// if necessary.
107pub enum Payload<'a> {
108    /// Indicates the header of a WebAssembly module or component.
109    Version {
110        /// The version number found in the header.
111        num: u16,
112        /// The encoding format being parsed.
113        encoding: Encoding,
114        /// The range of bytes that were parsed to consume the header of the
115        /// module or component. Note that this range is relative to the start
116        /// of the byte stream.
117        range: Range<usize>,
118    },
119
120    /// A module type section was received and the provided reader can be
121    /// used to parse the contents of the type section.
122    TypeSection(TypeSectionReader<'a>),
123    /// A module import section was received and the provided reader can be
124    /// used to parse the contents of the import section.
125    ImportSection(ImportSectionReader<'a>),
126    /// A module function section was received and the provided reader can be
127    /// used to parse the contents of the function section.
128    FunctionSection(FunctionSectionReader<'a>),
129    /// A module table section was received and the provided reader can be
130    /// used to parse the contents of the table section.
131    TableSection(TableSectionReader<'a>),
132    /// A module memory section was received and the provided reader can be
133    /// used to parse the contents of the memory section.
134    MemorySection(MemorySectionReader<'a>),
135    /// A module tag section was received, and the provided reader can be
136    /// used to parse the contents of the tag section.
137    TagSection(TagSectionReader<'a>),
138    /// A module global section was received and the provided reader can be
139    /// used to parse the contents of the global section.
140    GlobalSection(GlobalSectionReader<'a>),
141    /// A module export section was received, and the provided reader can be
142    /// used to parse the contents of the export section.
143    ExportSection(ExportSectionReader<'a>),
144    /// A module start section was received.
145    StartSection {
146        /// The start function index
147        func: u32,
148        /// The range of bytes that specify the `func` field, specified in
149        /// offsets relative to the start of the byte stream.
150        range: Range<usize>,
151    },
152    /// A module element section was received and the provided reader can be
153    /// used to parse the contents of the element section.
154    ElementSection(ElementSectionReader<'a>),
155    /// A module data count section was received.
156    DataCountSection {
157        /// The number of data segments.
158        count: u32,
159        /// The range of bytes that specify the `count` field, specified in
160        /// offsets relative to the start of the byte stream.
161        range: Range<usize>,
162    },
163    /// A module data section was received and the provided reader can be
164    /// used to parse the contents of the data section.
165    DataSection(DataSectionReader<'a>),
166    /// Indicator of the start of the code section of a WebAssembly module.
167    ///
168    /// This entry is returned whenever the code section starts. The `count`
169    /// field indicates how many entries are in this code section. After
170    /// receiving this start marker you're guaranteed that the next `count`
171    /// items will be either `CodeSectionEntry` or an error will be returned.
172    ///
173    /// This, unlike other sections, is intended to be used for streaming the
174    /// contents of the code section. The code section is not required to be
175    /// fully resident in memory when we parse it. Instead a [`Parser`] is
176    /// capable of parsing piece-by-piece of a code section.
177    CodeSectionStart {
178        /// The number of functions in this section.
179        count: u32,
180        /// The range of bytes that represent this section, specified in
181        /// offsets relative to the start of the byte stream.
182        range: Range<usize>,
183        /// The size, in bytes, of the remaining contents of this section.
184        ///
185        /// This can be used in combination with [`Parser::skip_section`]
186        /// where the caller will know how many bytes to skip before feeding
187        /// bytes into `Parser` again.
188        size: u32,
189    },
190    /// An entry of the code section, a function, was parsed from a WebAssembly
191    /// module.
192    ///
193    /// This entry indicates that a function was successfully received from the
194    /// code section, and the payload here is the window into the original input
195    /// where the function resides. Note that the function itself has not been
196    /// parsed, it's only been outlined. You'll need to process the
197    /// `FunctionBody` provided to test whether it parses and/or is valid.
198    CodeSectionEntry(FunctionBody<'a>),
199
200    /// A core module section was received and the provided parser can be
201    /// used to parse the nested module.
202    ///
203    /// This variant is special in that it returns a sub-`Parser`. Upon
204    /// receiving a `ModuleSection` it is expected that the returned
205    /// `Parser` will be used instead of the parent `Parser` until the parse has
206    /// finished. You'll need to feed data into the `Parser` returned until it
207    /// returns `Payload::End`. After that you'll switch back to the parent
208    /// parser to resume parsing the rest of the current component.
209    ///
210    /// Note that binaries will not be parsed correctly if you feed the data for
211    /// a nested module into the parent [`Parser`].
212    ModuleSection {
213        /// The parser for the nested module.
214        parser: Parser,
215        /// The range of bytes that represent the nested module in the
216        /// original byte stream.
217        range: Range<usize>,
218    },
219    /// A core instance section was received and the provided parser can be
220    /// used to parse the contents of the core instance section.
221    ///
222    /// Currently this section is only parsed in a component.
223    InstanceSection(InstanceSectionReader<'a>),
224    /// A core type section was received and the provided parser can be
225    /// used to parse the contents of the core type section.
226    ///
227    /// Currently this section is only parsed in a component.
228    CoreTypeSection(CoreTypeSectionReader<'a>),
229    /// A component section from a WebAssembly component was received and the
230    /// provided parser can be used to parse the nested component.
231    ///
232    /// This variant is special in that it returns a sub-`Parser`. Upon
233    /// receiving a `ComponentSection` it is expected that the returned
234    /// `Parser` will be used instead of the parent `Parser` until the parse has
235    /// finished. You'll need to feed data into the `Parser` returned until it
236    /// returns `Payload::End`. After that you'll switch back to the parent
237    /// parser to resume parsing the rest of the current component.
238    ///
239    /// Note that binaries will not be parsed correctly if you feed the data for
240    /// a nested component into the parent [`Parser`].
241    ComponentSection {
242        /// The parser for the nested component.
243        parser: Parser,
244        /// The range of bytes that represent the nested component in the
245        /// original byte stream.
246        range: Range<usize>,
247    },
248    /// A component instance section was received and the provided reader can be
249    /// used to parse the contents of the component instance section.
250    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
251    /// A component alias section was received and the provided reader can be
252    /// used to parse the contents of the component alias section.
253    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
254    /// A component type section was received and the provided reader can be
255    /// used to parse the contents of the component type section.
256    ComponentTypeSection(ComponentTypeSectionReader<'a>),
257    /// A component canonical section was received and the provided reader can be
258    /// used to parse the contents of the component canonical section.
259    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
260    /// A component start section was received.
261    ComponentStartSection {
262        /// The start function description.
263        start: ComponentStartFunction,
264        /// The range of bytes that specify the `start` field.
265        range: Range<usize>,
266    },
267    /// A component import section was received and the provided reader can be
268    /// used to parse the contents of the component import section.
269    ComponentImportSection(ComponentImportSectionReader<'a>),
270    /// A component export section was received, and the provided reader can be
271    /// used to parse the contents of the component export section.
272    ComponentExportSection(ComponentExportSectionReader<'a>),
273
274    /// A module or component custom section was received.
275    CustomSection(CustomSectionReader<'a>),
276
277    /// An unknown section was found.
278    ///
279    /// This variant is returned for all unknown sections encountered. This
280    /// likely wants to be interpreted as an error by consumers of the parser,
281    /// but this can also be used to parse sections currently unsupported by
282    /// the parser.
283    UnknownSection {
284        /// The 8-bit identifier for this section.
285        id: u8,
286        /// The contents of this section.
287        contents: &'a [u8],
288        /// The range of bytes, relative to the start of the original data
289        /// stream, that the contents of this section reside in.
290        range: Range<usize>,
291    },
292
293    /// The end of the WebAssembly module or component was reached.
294    ///
295    /// The value is the offset in the input byte stream where the end
296    /// was reached.
297    End(usize),
298}
299
300const CUSTOM_SECTION: u8 = 0;
301const TYPE_SECTION: u8 = 1;
302const IMPORT_SECTION: u8 = 2;
303const FUNCTION_SECTION: u8 = 3;
304const TABLE_SECTION: u8 = 4;
305const MEMORY_SECTION: u8 = 5;
306const GLOBAL_SECTION: u8 = 6;
307const EXPORT_SECTION: u8 = 7;
308const START_SECTION: u8 = 8;
309const ELEMENT_SECTION: u8 = 9;
310const CODE_SECTION: u8 = 10;
311const DATA_SECTION: u8 = 11;
312const DATA_COUNT_SECTION: u8 = 12;
313const TAG_SECTION: u8 = 13;
314
315const COMPONENT_MODULE_SECTION: u8 = 1;
316const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
317const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
318const COMPONENT_SECTION: u8 = 4;
319const COMPONENT_INSTANCE_SECTION: u8 = 5;
320const COMPONENT_ALIAS_SECTION: u8 = 6;
321const COMPONENT_TYPE_SECTION: u8 = 7;
322const COMPONENT_CANONICAL_SECTION: u8 = 8;
323const COMPONENT_START_SECTION: u8 = 9;
324const COMPONENT_IMPORT_SECTION: u8 = 10;
325const COMPONENT_EXPORT_SECTION: u8 = 11;
326
327impl Parser {
328    /// Creates a new parser.
329    ///
330    /// Reports errors and ranges relative to `offset` provided, where `offset`
331    /// is some logical offset within the input stream that we're parsing.
332    pub fn new(offset: u64) -> Parser {
333        Parser {
334            state: State::Header,
335            offset,
336            max_size: u64::MAX,
337            // Assume the encoding is a module until we know otherwise
338            encoding: Encoding::Module,
339        }
340    }
341
342    /// Tests whether `bytes` looks like a core WebAssembly module.
343    ///
344    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
345    /// starts with the standard core WebAssembly header.
346    pub fn is_core_wasm(bytes: &[u8]) -> bool {
347        const HEADER: [u8; 8] = [
348            WASM_MAGIC_NUMBER[0],
349            WASM_MAGIC_NUMBER[1],
350            WASM_MAGIC_NUMBER[2],
351            WASM_MAGIC_NUMBER[3],
352            WASM_MODULE_VERSION.to_le_bytes()[0],
353            WASM_MODULE_VERSION.to_le_bytes()[1],
354            KIND_MODULE.to_le_bytes()[0],
355            KIND_MODULE.to_le_bytes()[1],
356        ];
357        bytes.starts_with(&HEADER)
358    }
359
360    /// Tests whether `bytes` looks like a WebAssembly component.
361    ///
362    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
363    /// starts with the standard WebAssembly component header.
364    pub fn is_component(bytes: &[u8]) -> bool {
365        const HEADER: [u8; 8] = [
366            WASM_MAGIC_NUMBER[0],
367            WASM_MAGIC_NUMBER[1],
368            WASM_MAGIC_NUMBER[2],
369            WASM_MAGIC_NUMBER[3],
370            WASM_COMPONENT_VERSION.to_le_bytes()[0],
371            WASM_COMPONENT_VERSION.to_le_bytes()[1],
372            KIND_COMPONENT.to_le_bytes()[0],
373            KIND_COMPONENT.to_le_bytes()[1],
374        ];
375        bytes.starts_with(&HEADER)
376    }
377
378    /// Attempts to parse a chunk of data.
379    ///
380    /// This method will attempt to parse the next incremental portion of a
381    /// WebAssembly binary. Data available for the module or component is
382    /// provided as `data`, and the data can be incomplete if more data has yet
383    /// to arrive. The `eof` flag indicates whether more data will ever be received.
384    ///
385    /// There are two ways parsing can succeed with this method:
386    ///
387    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
388    ///   in `data` to parse a payload. The caller needs to wait for more data to
389    ///   be available in this situation before calling this method again. It is
390    ///   guaranteed that this is only returned if `eof` is `false`.
391    ///
392    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
393    ///   successfully parsed. The payload is available in this variant of what
394    ///   was parsed, and this also indicates how many bytes of `data` was
395    ///   consumed. It's expected that the caller will not provide these bytes
396    ///   back to the [`Parser`] again.
397    ///
398    /// Note that all `Chunk` return values are connected, with a lifetime, to
399    /// the input buffer. Each parsed chunk borrows the input buffer and is a
400    /// view into it for successfully parsed chunks.
401    ///
402    /// It is expected that you'll call this method until `Payload::End` is
403    /// reached, at which point you're guaranteed that the parse has completed.
404    /// Note that complete parsing, for the top-level module or component,
405    /// implies that `data` is empty and `eof` is `true`.
406    ///
407    /// # Errors
408    ///
409    /// Parse errors are returned as an `Err`. Errors can happen when the
410    /// structure of the data is unexpected or if sections are too large for
411    /// example. Note that errors are not returned for malformed *contents* of
412    /// sections here. Sections are generally not individually parsed and each
413    /// returned [`Payload`] needs to be iterated over further to detect all
414    /// errors.
415    ///
416    // /// # Examples
417    // ///
418    // /// An example of reading a wasm file from a stream (`std::io::Read`) and
419    // /// incrementally parsing it.
420    // ///
421    // /// ```
422    // /// use crate::std::io::Read;
423    // /// use anyhow::Result;
424    // /// use tinywasm_wasmparser::{Parser, Chunk, Payload::*};
425    // ///
426    // /// fn parse(mut reader: impl Read) -> Result<()> {
427    // ///     let mut buf = Vec::new();
428    // ///     let mut cur = Parser::new(0);
429    // ///     let mut eof = false;
430    // ///     let mut stack = Vec::new();
431    // ///
432    // ///     loop {
433    // ///         let (payload, consumed) = match cur.parse(&buf, eof)? {
434    // ///             Chunk::NeedMoreData(hint) => {
435    // ///                 assert!(!eof); // otherwise an error would be returned
436    // ///
437    // ///                 // Use the hint to preallocate more space, then read
438    // ///                 // some more data into our buffer.
439    // ///                 //
440    // ///                 // Note that the buffer management here is not ideal,
441    // ///                 // but it's compact enough to fit in an example!
442    // ///                 let len = buf.len();
443    // ///                 buf.extend((0..hint).map(|_| 0u8));
444    // ///                 let n = reader.read(&mut buf[len..])?;
445    // ///                 buf.truncate(len + n);
446    // ///                 eof = n == 0;
447    // ///                 continue;
448    // ///             }
449    // ///
450    // ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
451    // ///         };
452    // ///
453    // ///         match payload {
454    // ///             // Sections for WebAssembly modules
455    // ///             Version { .. } => { /* ... */ }
456    // ///             TypeSection(_) => { /* ... */ }
457    // ///             ImportSection(_) => { /* ... */ }
458    // ///             FunctionSection(_) => { /* ... */ }
459    // ///             TableSection(_) => { /* ... */ }
460    // ///             MemorySection(_) => { /* ... */ }
461    // ///             TagSection(_) => { /* ... */ }
462    // ///             GlobalSection(_) => { /* ... */ }
463    // ///             ExportSection(_) => { /* ... */ }
464    // ///             StartSection { .. } => { /* ... */ }
465    // ///             ElementSection(_) => { /* ... */ }
466    // ///             DataCountSection { .. } => { /* ... */ }
467    // ///             DataSection(_) => { /* ... */ }
468    // ///
469    // ///             // Here we know how many functions we'll be receiving as
470    // ///             // `CodeSectionEntry`, so we can prepare for that, and
471    // ///             // afterwards we can parse and handle each function
472    // ///             // individually.
473    // ///             CodeSectionStart { .. } => { /* ... */ }
474    // ///             CodeSectionEntry(body) => {
475    // ///                 // here we can iterate over `body` to parse the function
476    // ///                 // and its locals
477    // ///             }
478    // ///
479    // ///             // Sections for WebAssembly components
480    // ///             InstanceSection(_) => { /* ... */ }
481    // ///             CoreTypeSection(_) => { /* ... */ }
482    // ///             ComponentInstanceSection(_) => { /* ... */ }
483    // ///             ComponentAliasSection(_) => { /* ... */ }
484    // ///             ComponentTypeSection(_) => { /* ... */ }
485    // ///             ComponentCanonicalSection(_) => { /* ... */ }
486    // ///             ComponentStartSection { .. } => { /* ... */ }
487    // ///             ComponentImportSection(_) => { /* ... */ }
488    // ///             ComponentExportSection(_) => { /* ... */ }
489    // ///
490    // ///             ModuleSection { parser, .. }
491    // ///             | ComponentSection { parser, .. } => {
492    // ///                 stack.push(cur.clone());
493    // ///                 cur = parser.clone();
494    // ///             }
495    // ///
496    // ///             CustomSection(_) => { /* ... */ }
497    // ///
498    // ///             // most likely you'd return an error here
499    // ///             UnknownSection { id, .. } => { /* ... */ }
500    // ///
501    // ///             // Once we've reached the end of a parser we either resume
502    // ///             // at the parent parser or we break out of the loop because
503    // ///             // we're done.
504    // ///             End(_) => {
505    // ///                 if let Some(parent_parser) = stack.pop() {
506    // ///                     cur = parent_parser;
507    // ///                 } else {
508    // ///                     break;
509    // ///                 }
510    // ///             }
511    // ///         }
512    // ///
513    // ///         // once we're done processing the payload we can forget the
514    // ///         // original.
515    // ///         buf.drain(..consumed);
516    // ///     }
517    // ///
518    // ///     Ok(())
519    // /// }
520    // ///
521    // /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
522    // /// ```
523    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
524        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
525            (&data[..(self.max_size as usize)], true)
526        } else {
527            (data, eof)
528        };
529        // TODO: thread through `offset: u64` to `BinaryReader`, remove
530        // the cast here.
531        let mut reader = BinaryReader::new_with_offset(data, self.offset as usize);
532        match self.parse_reader(&mut reader, eof) {
533            Ok(payload) => {
534                // Be sure to update our offset with how far we got in the
535                // reader
536                self.offset += usize_to_u64(reader.position);
537                self.max_size -= usize_to_u64(reader.position);
538                Ok(Chunk::Parsed {
539                    consumed: reader.position,
540                    payload,
541                })
542            }
543            Err(e) => {
544                // If we're at EOF then there's no way we can recover from any
545                // error, so continue to propagate it.
546                if eof {
547                    return Err(e);
548                }
549
550                // If our error doesn't look like it can be resolved with more
551                // data being pulled down, then propagate it, otherwise switch
552                // the error to "feed me please"
553                match e.inner.needed_hint {
554                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
555                    None => Err(e),
556                }
557            }
558        }
559    }
560
561    fn parse_reader<'a>(
562        &mut self,
563        reader: &mut BinaryReader<'a>,
564        eof: bool,
565    ) -> Result<Payload<'a>> {
566        use Payload::*;
567
568        match self.state {
569            State::Header => {
570                let start = reader.original_position();
571                let header_version = reader.read_header_version()?;
572                self.encoding = match (header_version >> 16) as u16 {
573                    KIND_MODULE => Encoding::Module,
574                    KIND_COMPONENT => Encoding::Component,
575                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
576                };
577                let num = header_version as u16;
578                self.state = State::SectionStart;
579                Ok(Version {
580                    num,
581                    encoding: self.encoding,
582                    range: start..reader.original_position(),
583                })
584            }
585            State::SectionStart => {
586                // If we're at eof and there are no bytes in our buffer, then
587                // that means we reached the end of the data since it's
588                // just a bunch of sections concatenated after the header.
589                if eof && reader.bytes_remaining() == 0 {
590                    return Ok(Payload::End(reader.original_position()));
591                }
592
593                let id_pos = reader.position;
594                let id = reader.read_u8()?;
595                if id & 0x80 != 0 {
596                    return Err(BinaryReaderError::new("malformed section id", id_pos));
597                }
598                let len_pos = reader.original_position();
599                let mut len = reader.read_var_u32()?;
600
601                // Test to make sure that this section actually fits within
602                // `Parser::max_size`. This doesn't matter for top-level modules
603                // but it is required for nested modules/components to correctly ensure
604                // that all sections live entirely within their section of the
605                // file.
606                let section_overflow = self
607                    .max_size
608                    .checked_sub(usize_to_u64(reader.position))
609                    .and_then(|s| s.checked_sub(len.into()))
610                    .is_none();
611                if section_overflow {
612                    return Err(BinaryReaderError::new("section too large", len_pos));
613                }
614
615                match (self.encoding, id) {
616                    // Sections for both modules and components.
617                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
618
619                    // Module sections
620                    (Encoding::Module, TYPE_SECTION) => {
621                        section(reader, len, TypeSectionReader::new, TypeSection)
622                    }
623                    (Encoding::Module, IMPORT_SECTION) => {
624                        section(reader, len, ImportSectionReader::new, ImportSection)
625                    }
626                    (Encoding::Module, FUNCTION_SECTION) => {
627                        section(reader, len, FunctionSectionReader::new, FunctionSection)
628                    }
629                    (Encoding::Module, TABLE_SECTION) => {
630                        section(reader, len, TableSectionReader::new, TableSection)
631                    }
632                    (Encoding::Module, MEMORY_SECTION) => {
633                        section(reader, len, MemorySectionReader::new, MemorySection)
634                    }
635                    (Encoding::Module, GLOBAL_SECTION) => {
636                        section(reader, len, GlobalSectionReader::new, GlobalSection)
637                    }
638                    (Encoding::Module, EXPORT_SECTION) => {
639                        section(reader, len, ExportSectionReader::new, ExportSection)
640                    }
641                    (Encoding::Module, START_SECTION) => {
642                        let (func, range) = single_item(reader, len, "start")?;
643                        Ok(StartSection { func, range })
644                    }
645                    (Encoding::Module, ELEMENT_SECTION) => {
646                        section(reader, len, ElementSectionReader::new, ElementSection)
647                    }
648                    (Encoding::Module, CODE_SECTION) => {
649                        let start = reader.original_position();
650                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
651                        let range = start..reader.original_position() + len as usize;
652                        self.state = State::FunctionBody {
653                            remaining: count,
654                            len,
655                        };
656                        Ok(CodeSectionStart {
657                            count,
658                            range,
659                            size: len,
660                        })
661                    }
662                    (Encoding::Module, DATA_SECTION) => {
663                        section(reader, len, DataSectionReader::new, DataSection)
664                    }
665                    (Encoding::Module, DATA_COUNT_SECTION) => {
666                        let (count, range) = single_item(reader, len, "data count")?;
667                        Ok(DataCountSection { count, range })
668                    }
669                    (Encoding::Module, TAG_SECTION) => {
670                        section(reader, len, TagSectionReader::new, TagSection)
671                    }
672
673                    // Component sections
674                    (Encoding::Component, COMPONENT_MODULE_SECTION)
675                    | (Encoding::Component, COMPONENT_SECTION) => {
676                        if len as usize > MAX_WASM_MODULE_SIZE {
677                            bail!(
678                                len_pos,
679                                "{} section is too large",
680                                if id == 1 { "module" } else { "component " }
681                            );
682                        }
683
684                        let range =
685                            reader.original_position()..reader.original_position() + len as usize;
686                        self.max_size -= u64::from(len);
687                        self.offset += u64::from(len);
688                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
689                        parser.max_size = len.into();
690
691                        Ok(match id {
692                            1 => ModuleSection { parser, range },
693                            4 => ComponentSection { parser, range },
694                            _ => unreachable!(),
695                        })
696                    }
697                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
698                        section(reader, len, InstanceSectionReader::new, InstanceSection)
699                    }
700                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
701                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
702                    }
703                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
704                        reader,
705                        len,
706                        ComponentInstanceSectionReader::new,
707                        ComponentInstanceSection,
708                    ),
709                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
710                        section(reader, len, SectionLimited::new, ComponentAliasSection)
711                    }
712                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
713                        reader,
714                        len,
715                        ComponentTypeSectionReader::new,
716                        ComponentTypeSection,
717                    ),
718                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
719                        reader,
720                        len,
721                        ComponentCanonicalSectionReader::new,
722                        ComponentCanonicalSection,
723                    ),
724                    (Encoding::Component, COMPONENT_START_SECTION) => {
725                        let (start, range) = single_item(reader, len, "component start")?;
726                        Ok(ComponentStartSection { start, range })
727                    }
728                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
729                        reader,
730                        len,
731                        ComponentImportSectionReader::new,
732                        ComponentImportSection,
733                    ),
734                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
735                        reader,
736                        len,
737                        ComponentExportSectionReader::new,
738                        ComponentExportSection,
739                    ),
740                    (_, id) => {
741                        let offset = reader.original_position();
742                        let contents = reader.read_bytes(len as usize)?;
743                        let range = offset..offset + len as usize;
744                        Ok(UnknownSection {
745                            id,
746                            contents,
747                            range,
748                        })
749                    }
750                }
751            }
752
753            // Once we hit 0 remaining incrementally parsed items, with 0
754            // remaining bytes in each section, we're done and can switch back
755            // to parsing sections.
756            State::FunctionBody {
757                remaining: 0,
758                len: 0,
759            } => {
760                self.state = State::SectionStart;
761                self.parse_reader(reader, eof)
762            }
763
764            // ... otherwise trailing bytes with no remaining entries in these
765            // sections indicates an error.
766            State::FunctionBody { remaining: 0, len } => {
767                debug_assert!(len > 0);
768                let offset = reader.original_position();
769                Err(BinaryReaderError::new(
770                    "trailing bytes at end of section",
771                    offset,
772                ))
773            }
774
775            // Functions are relatively easy to parse when we know there's at
776            // least one remaining and at least one byte available to read
777            // things.
778            //
779            // We use the remaining length try to read a u32 size of the
780            // function, and using that size we require the entire function be
781            // resident in memory. This means that we're reading whole chunks of
782            // functions at a time.
783            //
784            // Limiting via `Parser::max_size` (nested parsing) happens above in
785            // `fn parse`, and limiting by our section size happens via
786            // `delimited`. Actual parsing of the function body is delegated to
787            // the caller to iterate over the `FunctionBody` structure.
788            State::FunctionBody { remaining, mut len } => {
789                let body = delimited(reader, &mut len, |r| {
790                    let size = r.read_var_u32()?;
791                    let offset = r.original_position();
792                    Ok(FunctionBody::new(offset, r.read_bytes(size as usize)?))
793                })?;
794                self.state = State::FunctionBody {
795                    remaining: remaining - 1,
796                    len,
797                };
798                Ok(CodeSectionEntry(body))
799            }
800        }
801    }
802
803    /// Convenience function that can be used to parse a module or component
804    /// that is entirely resident in memory.
805    ///
806    /// This function will parse the `data` provided as a WebAssembly module
807    /// or component.
808    ///
809    /// Note that when this function yields sections that provide parsers,
810    /// no further action is required for those sections as payloads from
811    /// those parsers will be automatically returned.
812    ///
813    // /// # Examples
814    // ///
815    // /// An example of reading a wasm file from a stream (`std::io::Read`) into
816    // /// a buffer and then parsing it.
817    // ///
818    // /// ```
819    // /// use crate::std::io::Read;
820    // /// use anyhow::Result;
821    // /// use tinywasm_wasmparser::{Parser, Chunk, Payload::*};
822    // ///
823    // /// fn parse(mut reader: impl Read) -> Result<()> {
824    // ///     let mut buf = Vec::new();
825    // ///     reader.read_to_end(&mut buf)?;
826    // ///     let parser = Parser::new(0);
827    // ///
828    // ///     for payload in parser.parse_all(&buf) {
829    // ///         match payload? {
830    // ///             // Sections for WebAssembly modules
831    // ///             Version { .. } => { /* ... */ }
832    // ///             TypeSection(_) => { /* ... */ }
833    // ///             ImportSection(_) => { /* ... */ }
834    // ///             FunctionSection(_) => { /* ... */ }
835    // ///             TableSection(_) => { /* ... */ }
836    // ///             MemorySection(_) => { /* ... */ }
837    // ///             TagSection(_) => { /* ... */ }
838    // ///             GlobalSection(_) => { /* ... */ }
839    // ///             ExportSection(_) => { /* ... */ }
840    // ///             StartSection { .. } => { /* ... */ }
841    // ///             ElementSection(_) => { /* ... */ }
842    // ///             DataCountSection { .. } => { /* ... */ }
843    // ///             DataSection(_) => { /* ... */ }
844    // ///
845    // ///             // Here we know how many functions we'll be receiving as
846    // ///             // `CodeSectionEntry`, so we can prepare for that, and
847    // ///             // afterwards we can parse and handle each function
848    // ///             // individually.
849    // ///             CodeSectionStart { .. } => { /* ... */ }
850    // ///             CodeSectionEntry(body) => {
851    // ///                 // here we can iterate over `body` to parse the function
852    // ///                 // and its locals
853    // ///             }
854    // ///
855    // ///             // Sections for WebAssembly components
856    // ///             ModuleSection { .. } => { /* ... */ }
857    // ///             InstanceSection(_) => { /* ... */ }
858    // ///             CoreTypeSection(_) => { /* ... */ }
859    // ///             ComponentSection { .. } => { /* ... */ }
860    // ///             ComponentInstanceSection(_) => { /* ... */ }
861    // ///             ComponentAliasSection(_) => { /* ... */ }
862    // ///             ComponentTypeSection(_) => { /* ... */ }
863    // ///             ComponentCanonicalSection(_) => { /* ... */ }
864    // ///             ComponentStartSection { .. } => { /* ... */ }
865    // ///             ComponentImportSection(_) => { /* ... */ }
866    // ///             ComponentExportSection(_) => { /* ... */ }
867    // ///
868    // ///             CustomSection(_) => { /* ... */ }
869    // ///
870    // ///             // most likely you'd return an error here
871    // ///             UnknownSection { id, .. } => { /* ... */ }
872    // ///
873    // ///             // Once we've reached the end of a parser we either resume
874    // ///             // at the parent parser or the payload iterator is at its
875    // ///             // end and we're done.
876    // ///             End(_) => {}
877    // ///         }
878    // ///     }
879    // ///
880    // ///     Ok(())
881    // /// }
882    // ///
883    // /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
884    // /// ```
885    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
886        let mut stack = Vec::new();
887        let mut cur = self;
888        let mut done = false;
889        iter::from_fn(move || {
890            if done {
891                return None;
892            }
893            let payload = match cur.parse(data, true) {
894                // Propagate all errors
895                Err(e) => {
896                    done = true;
897                    return Some(Err(e));
898                }
899
900                // This isn't possible because `eof` is always true.
901                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
902
903                Ok(Chunk::Parsed { payload, consumed }) => {
904                    data = &data[consumed..];
905                    payload
906                }
907            };
908
909            match &payload {
910                Payload::ModuleSection { parser, .. }
911                | Payload::ComponentSection { parser, .. } => {
912                    stack.push(cur.clone());
913                    cur = parser.clone();
914                }
915                Payload::End(_) => match stack.pop() {
916                    Some(p) => cur = p,
917                    None => done = true,
918                },
919
920                _ => {}
921            }
922
923            Some(Ok(payload))
924        })
925    }
926
927    /// Skip parsing the code section entirely.
928    ///
929    /// This function can be used to indicate, after receiving
930    /// `CodeSectionStart`, that the section will not be parsed.
931    ///
932    /// The caller will be responsible for skipping `size` bytes (found in the
933    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
934    /// after the `size` bytes have been skipped.
935    ///
936    /// # Panics
937    ///
938    /// This function will panic if the parser is not in a state where it's
939    /// parsing the code section.
940    ///
941    // /// # Examples
942    // ///
943    // /// ```
944    // /// use tinywasm_wasmparser::{Result, Parser, Chunk, Payload::*};
945    // /// use crate::std::ops::Range;
946    // ///
947    // /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
948    // ///     let mut parser = Parser::new(0);
949    // ///     loop {
950    // ///         let payload = match parser.parse(wasm, true)? {
951    // ///             Chunk::Parsed { consumed, payload } => {
952    // ///                 wasm = &wasm[consumed..];
953    // ///                 payload
954    // ///             }
955    // ///             // this state isn't possible with `eof = true`
956    // ///             Chunk::NeedMoreData(_) => unreachable!(),
957    // ///         };
958    // ///         match payload {
959    // ///             TypeSection(s) => print_range("type section", &s.range()),
960    // ///             ImportSection(s) => print_range("import section", &s.range()),
961    // ///             // .. other sections
962    // ///
963    // ///             // Print the range of the code section we see, but don't
964    // ///             // actually iterate over each individual function.
965    // ///             CodeSectionStart { range, size, .. } => {
966    // ///                 print_range("code section", &range);
967    // ///                 parser.skip_section();
968    // ///                 wasm = &wasm[size as usize..];
969    // ///             }
970    // ///             End(_) => break,
971    // ///             _ => {}
972    // ///         }
973    // ///     }
974    // ///     Ok(())
975    // /// }
976    // ///
977    // /// fn print_range(section: &str, range: &Range<usize>) {
978    // ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
979    // /// }
980    // /// ```
981    pub fn skip_section(&mut self) {
982        let skip = match self.state {
983            State::FunctionBody { remaining: _, len } => len,
984            _ => panic!("wrong state to call `skip_section`"),
985        };
986        self.offset += u64::from(skip);
987        self.max_size -= u64::from(skip);
988        self.state = State::SectionStart;
989    }
990}
991
992fn usize_to_u64(a: usize) -> u64 {
993    a.try_into().unwrap()
994}
995
996/// Parses an entire section resident in memory into a `Payload`.
997///
998/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
999/// to construct the section to return.
1000fn section<'a, T>(
1001    reader: &mut BinaryReader<'a>,
1002    len: u32,
1003    ctor: fn(&'a [u8], usize) -> Result<T>,
1004    variant: fn(T) -> Payload<'a>,
1005) -> Result<Payload<'a>> {
1006    let offset = reader.original_position();
1007    let payload = reader.read_bytes(len as usize)?;
1008    // clear the hint for "need this many more bytes" here because we already
1009    // read all the bytes, so it's not possible to read more bytes if this
1010    // fails.
1011    let reader = ctor(payload, offset).map_err(clear_hint)?;
1012    Ok(variant(reader))
1013}
1014
1015/// Reads a section that is represented by a single uleb-encoded `u32`.
1016fn single_item<'a, T>(
1017    reader: &mut BinaryReader<'a>,
1018    len: u32,
1019    desc: &str,
1020) -> Result<(T, Range<usize>)>
1021where
1022    T: FromReader<'a>,
1023{
1024    let range = reader.original_position()..reader.original_position() + len as usize;
1025    let mut content = BinaryReader::new_with_offset(reader.read_bytes(len as usize)?, range.start);
1026    // We can't recover from "unexpected eof" here because our entire section is
1027    // already resident in memory, so clear the hint for how many more bytes are
1028    // expected.
1029    let ret = content.read().map_err(clear_hint)?;
1030    if !content.eof() {
1031        bail!(
1032            content.original_position(),
1033            "unexpected content in the {desc} section",
1034        );
1035    }
1036    Ok((ret, range))
1037}
1038
1039/// Attempts to parse using `f`.
1040///
1041/// This will update `*len` with the number of bytes consumed, and it will cause
1042/// a failure to be returned instead of the number of bytes consumed exceeds
1043/// what `*len` currently is.
1044fn delimited<'a, T>(
1045    reader: &mut BinaryReader<'a>,
1046    len: &mut u32,
1047    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1048) -> Result<T> {
1049    let start = reader.position;
1050    let ret = f(reader)?;
1051    *len = match (reader.position - start)
1052        .try_into()
1053        .ok()
1054        .and_then(|i| len.checked_sub(i))
1055    {
1056        Some(i) => i,
1057        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1058    };
1059    Ok(ret)
1060}
1061
1062impl Default for Parser {
1063    fn default() -> Parser {
1064        Parser::new(0)
1065    }
1066}
1067
1068impl Payload<'_> {
1069    /// If this `Payload` represents a section in the original wasm module then
1070    /// the section's id and range within the original wasm binary are returned.
1071    ///
1072    /// Not all payloads refer to entire sections, such as the `Version` and
1073    /// `CodeSectionEntry` variants. These variants will return `None` from this
1074    /// function.
1075    ///
1076    /// Otherwise this function will return `Some` where the first element is
1077    /// the byte identifier for the section and the second element is the range
1078    /// of the contents of the section within the original wasm binary.
1079    ///
1080    /// The purpose of this method is to enable tools to easily iterate over
1081    /// entire sections if necessary and handle sections uniformly, for example
1082    /// dropping custom sections while preserving all other sections.
1083    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1084        use Payload::*;
1085
1086        match self {
1087            Version { .. } => None,
1088            TypeSection(s) => Some((TYPE_SECTION, s.range())),
1089            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1090            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1091            TableSection(s) => Some((TABLE_SECTION, s.range())),
1092            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1093            TagSection(s) => Some((TAG_SECTION, s.range())),
1094            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1095            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1096            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1097            DataSection(s) => Some((DATA_SECTION, s.range())),
1098            StartSection { range, .. } => Some((START_SECTION, range.clone())),
1099            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1100            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1101            CodeSectionEntry(_) => None,
1102
1103            ModuleSection { range, .. } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1104            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1105            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1106            ComponentSection { range, .. } => Some((COMPONENT_SECTION, range.clone())),
1107            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1108            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1109            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1110            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1111            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1112            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1113            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1114
1115            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1116
1117            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1118
1119            End(_) => None,
1120        }
1121    }
1122}
1123
1124impl fmt::Debug for Payload<'_> {
1125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126        use Payload::*;
1127        match self {
1128            Version {
1129                num,
1130                encoding,
1131                range,
1132            } => f
1133                .debug_struct("Version")
1134                .field("num", num)
1135                .field("encoding", encoding)
1136                .field("range", range)
1137                .finish(),
1138
1139            // Module sections
1140            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1141            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1142            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1143            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1144            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1145            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1146            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1147            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1148            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1149            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1150            StartSection { func, range } => f
1151                .debug_struct("StartSection")
1152                .field("func", func)
1153                .field("range", range)
1154                .finish(),
1155            DataCountSection { count, range } => f
1156                .debug_struct("DataCountSection")
1157                .field("count", count)
1158                .field("range", range)
1159                .finish(),
1160            CodeSectionStart { count, range, size } => f
1161                .debug_struct("CodeSectionStart")
1162                .field("count", count)
1163                .field("range", range)
1164                .field("size", size)
1165                .finish(),
1166            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1167
1168            // Component sections
1169            ModuleSection { parser: _, range } => f
1170                .debug_struct("ModuleSection")
1171                .field("range", range)
1172                .finish(),
1173            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1174            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1175            ComponentSection { parser: _, range } => f
1176                .debug_struct("ComponentSection")
1177                .field("range", range)
1178                .finish(),
1179            ComponentInstanceSection(_) => f
1180                .debug_tuple("ComponentInstanceSection")
1181                .field(&"...")
1182                .finish(),
1183            ComponentAliasSection(_) => f
1184                .debug_tuple("ComponentAliasSection")
1185                .field(&"...")
1186                .finish(),
1187            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1188            ComponentCanonicalSection(_) => f
1189                .debug_tuple("ComponentCanonicalSection")
1190                .field(&"...")
1191                .finish(),
1192            ComponentStartSection { .. } => f
1193                .debug_tuple("ComponentStartSection")
1194                .field(&"...")
1195                .finish(),
1196            ComponentImportSection(_) => f
1197                .debug_tuple("ComponentImportSection")
1198                .field(&"...")
1199                .finish(),
1200            ComponentExportSection(_) => f
1201                .debug_tuple("ComponentExportSection")
1202                .field(&"...")
1203                .finish(),
1204
1205            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1206
1207            UnknownSection { id, range, .. } => f
1208                .debug_struct("UnknownSection")
1209                .field("id", id)
1210                .field("range", range)
1211                .finish(),
1212
1213            End(offset) => f.debug_tuple("End").field(offset).finish(),
1214        }
1215    }
1216}
1217
1218fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1219    err.inner.needed_hint = None;
1220    err
1221}
1222
1223#[cfg(test)]
1224mod tests {
1225    use super::*;
1226
1227    macro_rules! assert_matches {
1228        ($a:expr, $b:pat $(,)?) => {
1229            match $a {
1230                $b => {}
1231                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1232            }
1233        };
1234    }
1235
1236    #[test]
1237    fn header() {
1238        assert!(Parser::default().parse(&[], true).is_err());
1239        assert_matches!(
1240            Parser::default().parse(&[], false),
1241            Ok(Chunk::NeedMoreData(4)),
1242        );
1243        assert_matches!(
1244            Parser::default().parse(b"\0", false),
1245            Ok(Chunk::NeedMoreData(3)),
1246        );
1247        assert_matches!(
1248            Parser::default().parse(b"\0asm", false),
1249            Ok(Chunk::NeedMoreData(4)),
1250        );
1251        assert_matches!(
1252            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1253            Ok(Chunk::Parsed {
1254                consumed: 8,
1255                payload: Payload::Version { num: 1, .. },
1256            }),
1257        );
1258    }
1259
1260    #[test]
1261    fn header_iter() {
1262        for _ in Parser::default().parse_all(&[]) {}
1263        for _ in Parser::default().parse_all(b"\0") {}
1264        for _ in Parser::default().parse_all(b"\0asm") {}
1265        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1266    }
1267
1268    fn parser_after_header() -> Parser {
1269        let mut p = Parser::default();
1270        assert_matches!(
1271            p.parse(b"\0asm\x01\0\0\0", false),
1272            Ok(Chunk::Parsed {
1273                consumed: 8,
1274                payload: Payload::Version {
1275                    num: WASM_MODULE_VERSION,
1276                    encoding: Encoding::Module,
1277                    ..
1278                },
1279            }),
1280        );
1281        p
1282    }
1283
1284    fn parser_after_component_header() -> Parser {
1285        let mut p = Parser::default();
1286        assert_matches!(
1287            p.parse(b"\0asm\x0d\0\x01\0", false),
1288            Ok(Chunk::Parsed {
1289                consumed: 8,
1290                payload: Payload::Version {
1291                    num: WASM_COMPONENT_VERSION,
1292                    encoding: Encoding::Component,
1293                    ..
1294                },
1295            }),
1296        );
1297        p
1298    }
1299
1300    #[test]
1301    fn start_section() {
1302        assert_matches!(
1303            parser_after_header().parse(&[], false),
1304            Ok(Chunk::NeedMoreData(1)),
1305        );
1306        assert!(parser_after_header().parse(&[8], true).is_err());
1307        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1308        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1309        assert_matches!(
1310            parser_after_header().parse(&[8], false),
1311            Ok(Chunk::NeedMoreData(1)),
1312        );
1313        assert_matches!(
1314            parser_after_header().parse(&[8, 1], false),
1315            Ok(Chunk::NeedMoreData(1)),
1316        );
1317        assert_matches!(
1318            parser_after_header().parse(&[8, 2], false),
1319            Ok(Chunk::NeedMoreData(2)),
1320        );
1321        assert_matches!(
1322            parser_after_header().parse(&[8, 1, 1], false),
1323            Ok(Chunk::Parsed {
1324                consumed: 3,
1325                payload: Payload::StartSection { func: 1, .. },
1326            }),
1327        );
1328        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1329        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1330    }
1331
1332    #[test]
1333    fn end_works() {
1334        assert_matches!(
1335            parser_after_header().parse(&[], true),
1336            Ok(Chunk::Parsed {
1337                consumed: 0,
1338                payload: Payload::End(8),
1339            }),
1340        );
1341    }
1342
1343    #[test]
1344    fn type_section() {
1345        assert!(parser_after_header().parse(&[1], true).is_err());
1346        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1347        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1348        assert_matches!(
1349            parser_after_header().parse(&[1], false),
1350            Ok(Chunk::NeedMoreData(1)),
1351        );
1352        assert_matches!(
1353            parser_after_header().parse(&[1, 1], false),
1354            Ok(Chunk::NeedMoreData(1)),
1355        );
1356        assert_matches!(
1357            parser_after_header().parse(&[1, 1, 1], false),
1358            Ok(Chunk::Parsed {
1359                consumed: 3,
1360                payload: Payload::TypeSection(_),
1361            }),
1362        );
1363        assert_matches!(
1364            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1365            Ok(Chunk::Parsed {
1366                consumed: 3,
1367                payload: Payload::TypeSection(_),
1368            }),
1369        );
1370    }
1371
1372    #[test]
1373    fn custom_section() {
1374        assert!(parser_after_header().parse(&[0], true).is_err());
1375        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1376        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1377        assert_matches!(
1378            parser_after_header().parse(&[0, 2, 1], false),
1379            Ok(Chunk::NeedMoreData(1)),
1380        );
1381        assert_matches!(
1382            parser_after_header().parse(&[0, 1, 0], false),
1383            Ok(Chunk::Parsed {
1384                consumed: 3,
1385                payload: Payload::CustomSection(CustomSectionReader {
1386                    name: "",
1387                    data_offset: 11,
1388                    data: b"",
1389                    range: Range { start: 10, end: 11 },
1390                }),
1391            }),
1392        );
1393        assert_matches!(
1394            parser_after_header().parse(&[0, 2, 1, b'a'], false),
1395            Ok(Chunk::Parsed {
1396                consumed: 4,
1397                payload: Payload::CustomSection(CustomSectionReader {
1398                    name: "a",
1399                    data_offset: 12,
1400                    data: b"",
1401                    range: Range { start: 10, end: 12 },
1402                }),
1403            }),
1404        );
1405        assert_matches!(
1406            parser_after_header().parse(&[0, 2, 0, b'a'], false),
1407            Ok(Chunk::Parsed {
1408                consumed: 4,
1409                payload: Payload::CustomSection(CustomSectionReader {
1410                    name: "",
1411                    data_offset: 11,
1412                    data: b"a",
1413                    range: Range { start: 10, end: 12 },
1414                }),
1415            }),
1416        );
1417    }
1418
1419    #[test]
1420    fn function_section() {
1421        assert!(parser_after_header().parse(&[10], true).is_err());
1422        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1423        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1424        assert_matches!(
1425            parser_after_header().parse(&[10], false),
1426            Ok(Chunk::NeedMoreData(1))
1427        );
1428        assert_matches!(
1429            parser_after_header().parse(&[10, 1], false),
1430            Ok(Chunk::NeedMoreData(1))
1431        );
1432        let mut p = parser_after_header();
1433        assert_matches!(
1434            p.parse(&[10, 1, 0], false),
1435            Ok(Chunk::Parsed {
1436                consumed: 3,
1437                payload: Payload::CodeSectionStart { count: 0, .. },
1438            }),
1439        );
1440        assert_matches!(
1441            p.parse(&[], true),
1442            Ok(Chunk::Parsed {
1443                consumed: 0,
1444                payload: Payload::End(11),
1445            }),
1446        );
1447        let mut p = parser_after_header();
1448        assert_matches!(
1449            p.parse(&[10, 2, 1, 0], false),
1450            Ok(Chunk::Parsed {
1451                consumed: 3,
1452                payload: Payload::CodeSectionStart { count: 1, .. },
1453            }),
1454        );
1455        assert_matches!(
1456            p.parse(&[0], false),
1457            Ok(Chunk::Parsed {
1458                consumed: 1,
1459                payload: Payload::CodeSectionEntry(_),
1460            }),
1461        );
1462        assert_matches!(
1463            p.parse(&[], true),
1464            Ok(Chunk::Parsed {
1465                consumed: 0,
1466                payload: Payload::End(12),
1467            }),
1468        );
1469
1470        // 1 byte section with 1 function can't read the function body because
1471        // the section is too small
1472        let mut p = parser_after_header();
1473        assert_matches!(
1474            p.parse(&[10, 1, 1], false),
1475            Ok(Chunk::Parsed {
1476                consumed: 3,
1477                payload: Payload::CodeSectionStart { count: 1, .. },
1478            }),
1479        );
1480        assert_eq!(
1481            p.parse(&[0], false).unwrap_err().message(),
1482            "unexpected end-of-file"
1483        );
1484
1485        // section with 2 functions but section is cut off
1486        let mut p = parser_after_header();
1487        assert_matches!(
1488            p.parse(&[10, 2, 2], false),
1489            Ok(Chunk::Parsed {
1490                consumed: 3,
1491                payload: Payload::CodeSectionStart { count: 2, .. },
1492            }),
1493        );
1494        assert_matches!(
1495            p.parse(&[0], false),
1496            Ok(Chunk::Parsed {
1497                consumed: 1,
1498                payload: Payload::CodeSectionEntry(_),
1499            }),
1500        );
1501        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1502        assert_eq!(
1503            p.parse(&[0], false).unwrap_err().message(),
1504            "unexpected end-of-file",
1505        );
1506
1507        // trailing data is bad
1508        let mut p = parser_after_header();
1509        assert_matches!(
1510            p.parse(&[10, 3, 1], false),
1511            Ok(Chunk::Parsed {
1512                consumed: 3,
1513                payload: Payload::CodeSectionStart { count: 1, .. },
1514            }),
1515        );
1516        assert_matches!(
1517            p.parse(&[0], false),
1518            Ok(Chunk::Parsed {
1519                consumed: 1,
1520                payload: Payload::CodeSectionEntry(_),
1521            }),
1522        );
1523        assert_eq!(
1524            p.parse(&[0], false).unwrap_err().message(),
1525            "trailing bytes at end of section",
1526        );
1527    }
1528
1529    #[test]
1530    fn single_module() {
1531        let mut p = parser_after_component_header();
1532        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1533
1534        // A module that's 8 bytes in length
1535        let mut sub = match p.parse(&[1, 8], false) {
1536            Ok(Chunk::Parsed {
1537                consumed: 2,
1538                payload: Payload::ModuleSection { parser, .. },
1539            }) => parser,
1540            other => panic!("bad parse {:?}", other),
1541        };
1542
1543        // Parse the header of the submodule with the sub-parser.
1544        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1545        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1546        assert_matches!(
1547            sub.parse(b"\0asm\x01\0\0\0", false),
1548            Ok(Chunk::Parsed {
1549                consumed: 8,
1550                payload: Payload::Version {
1551                    num: 1,
1552                    encoding: Encoding::Module,
1553                    ..
1554                },
1555            }),
1556        );
1557
1558        // The sub-parser should be byte-limited so the next byte shouldn't get
1559        // consumed, it's intended for the parent parser.
1560        assert_matches!(
1561            sub.parse(&[10], false),
1562            Ok(Chunk::Parsed {
1563                consumed: 0,
1564                payload: Payload::End(18),
1565            }),
1566        );
1567
1568        // The parent parser should now be back to resuming, and we simulate it
1569        // being done with bytes to ensure that it's safely at the end,
1570        // completing the module code section.
1571        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1572        assert_matches!(
1573            p.parse(&[], true),
1574            Ok(Chunk::Parsed {
1575                consumed: 0,
1576                payload: Payload::End(18),
1577            }),
1578        );
1579    }
1580
1581    #[test]
1582    fn nested_section_too_big() {
1583        let mut p = parser_after_component_header();
1584
1585        // A module that's 10 bytes in length
1586        let mut sub = match p.parse(&[1, 10], false) {
1587            Ok(Chunk::Parsed {
1588                consumed: 2,
1589                payload: Payload::ModuleSection { parser, .. },
1590            }) => parser,
1591            other => panic!("bad parse {:?}", other),
1592        };
1593
1594        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1595        // module.
1596        assert_matches!(
1597            sub.parse(b"\0asm\x01\0\0\0", false),
1598            Ok(Chunk::Parsed {
1599                consumed: 8,
1600                payload: Payload::Version { num: 1, .. },
1601            }),
1602        );
1603
1604        // We can't parse a section which declares its bigger than the outer
1605        // module. This is a custom section, one byte big, with one content byte. The
1606        // content byte, however, lives outside of the parent's module code
1607        // section.
1608        assert_eq!(
1609            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1610            "section too large",
1611        );
1612    }
1613}
tinywasm_wasmparser/parser.rs

tinywasm_wasmparser/
parser.rs