wasmparser_nostd/
parser.rs

1use crate::CoreTypeSectionReader;
2use crate::{
3    limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
4    ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
5    ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
6    ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
7    GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
8    SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
9};
10use ::alloc::vec::Vec;
11use ::core::convert::TryInto;
12use ::core::fmt;
13use ::core::iter;
14use ::core::ops::Range;
15
16pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
17
18// Note that this started at `0xa` and we're incrementing up from there. When
19// the component model is stabilized this will become 0x1. The changes here are:
20//
21// * [????-??-??] 0xa - original version
22// * [2022-01-05] 0xb - `export` introduces an alias
23// * [2022-02-06] 0xc - `export` has an optional type ascribed to it
24pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xc;
25
26/// The supported encoding formats for the parser.
27#[derive(Debug, Clone, Copy, Eq, PartialEq)]
28pub enum Encoding {
29    /// The encoding format is a WebAssembly module.
30    Module,
31    /// The encoding format is a WebAssembly component.
32    Component,
33}
34
35/// An incremental parser of a binary WebAssembly module or component.
36///
37/// This type is intended to be used to incrementally parse a WebAssembly module
38/// or component as bytes become available for the module. This can also be used
39/// to parse modules or components that are already entirely resident within memory.
40///
41/// This primary function for a parser is the [`Parser::parse`] function which
42/// will incrementally consume input. You can also use the [`Parser::parse_all`]
43/// function to parse a module or component that is entirely resident in memory.
44#[derive(Debug, Clone)]
45pub struct Parser {
46    state: State,
47    offset: u64,
48    max_size: u64,
49    encoding: Encoding,
50}
51
52#[derive(Debug, Clone)]
53enum State {
54    Header,
55    SectionStart,
56    FunctionBody { remaining: u32, len: u32 },
57}
58
59/// A successful return payload from [`Parser::parse`].
60///
61/// On success one of two possible values can be returned, either that more data
62/// is needed to continue parsing or a chunk of the input was parsed, indicating
63/// how much of it was parsed.
64#[derive(Debug)]
65pub enum Chunk<'a> {
66    /// This can be returned at any time and indicates that more data is needed
67    /// to proceed with parsing. Zero bytes were consumed from the input to
68    /// [`Parser::parse`]. The `usize` value here is a hint as to how many more
69    /// bytes are needed to continue parsing.
70    NeedMoreData(u64),
71
72    /// A chunk was successfully parsed.
73    Parsed {
74        /// This many bytes of the `data` input to [`Parser::parse`] were
75        /// consumed to produce `payload`.
76        consumed: usize,
77        /// The value that we actually parsed.
78        payload: Payload<'a>,
79    },
80}
81
82/// Values that can be parsed from a WebAssembly module or component.
83///
84/// This enumeration is all possible chunks of pieces that can be parsed by a
85/// [`Parser`] from a binary WebAssembly module or component. Note that for many
86/// sections the entire section is parsed all at once, whereas other functions,
87/// like the code section, are parsed incrementally. This is a distinction where some
88/// sections, like the type section, are required to be fully resident in memory
89/// (fully downloaded) before proceeding. Other sections, like the code section,
90/// can be processed in a streaming fashion where each function is extracted
91/// individually so it can possibly be shipped to another thread while you wait
92/// for more functions to get downloaded.
93///
94/// Note that payloads, when returned, do not indicate that the module or component
95/// is valid. For example when you receive a `Payload::TypeSection` the type
96/// section itself has not yet actually been parsed. The reader returned will be
97/// able to parse it, but you'll have to actually iterate the reader to do the
98/// full parse. Each payload returned is intended to be a *window* into the
99/// original `data` passed to [`Parser::parse`] which can be further processed
100/// if necessary.
101pub enum Payload<'a> {
102    /// Indicates the header of a WebAssembly module or component.
103    Version {
104        /// The version number found in the header.
105        num: u16,
106        /// The encoding format being parsed.
107        encoding: Encoding,
108        /// The range of bytes that were parsed to consume the header of the
109        /// module or component. Note that this range is relative to the start
110        /// of the byte stream.
111        range: Range<usize>,
112    },
113
114    /// A module type section was received and the provided reader can be
115    /// used to parse the contents of the type section.
116    TypeSection(TypeSectionReader<'a>),
117    /// A module import section was received and the provided reader can be
118    /// used to parse the contents of the import section.
119    ImportSection(ImportSectionReader<'a>),
120    /// A module function section was received and the provided reader can be
121    /// used to parse the contents of the function section.
122    FunctionSection(FunctionSectionReader<'a>),
123    /// A module table section was received and the provided reader can be
124    /// used to parse the contents of the table section.
125    TableSection(TableSectionReader<'a>),
126    /// A module memory section was received and the provided reader can be
127    /// used to parse the contents of the memory section.
128    MemorySection(MemorySectionReader<'a>),
129    /// A module tag section was received, and the provided reader can be
130    /// used to parse the contents of the tag section.
131    TagSection(TagSectionReader<'a>),
132    /// A module global section was received and the provided reader can be
133    /// used to parse the contents of the global section.
134    GlobalSection(GlobalSectionReader<'a>),
135    /// A module export section was received, and the provided reader can be
136    /// used to parse the contents of the export section.
137    ExportSection(ExportSectionReader<'a>),
138    /// A module start section was received.
139    StartSection {
140        /// The start function index
141        func: u32,
142        /// The range of bytes that specify the `func` field, specified in
143        /// offsets relative to the start of the byte stream.
144        range: Range<usize>,
145    },
146    /// A module element section was received and the provided reader can be
147    /// used to parse the contents of the element section.
148    ElementSection(ElementSectionReader<'a>),
149    /// A module data count section was received.
150    DataCountSection {
151        /// The number of data segments.
152        count: u32,
153        /// The range of bytes that specify the `count` field, specified in
154        /// offsets relative to the start of the byte stream.
155        range: Range<usize>,
156    },
157    /// A module data section was received and the provided reader can be
158    /// used to parse the contents of the data section.
159    DataSection(DataSectionReader<'a>),
160    /// Indicator of the start of the code section of a WebAssembly module.
161    ///
162    /// This entry is returned whenever the code section starts. The `count`
163    /// field indicates how many entries are in this code section. After
164    /// receiving this start marker you're guaranteed that the next `count`
165    /// items will be either `CodeSectionEntry` or an error will be returned.
166    ///
167    /// This, unlike other sections, is intended to be used for streaming the
168    /// contents of the code section. The code section is not required to be
169    /// fully resident in memory when we parse it. Instead a [`Parser`] is
170    /// capable of parsing piece-by-piece of a code section.
171    CodeSectionStart {
172        /// The number of functions in this section.
173        count: u32,
174        /// The range of bytes that represent this section, specified in
175        /// offsets relative to the start of the byte stream.
176        range: Range<usize>,
177        /// The size, in bytes, of the remaining contents of this section.
178        ///
179        /// This can be used in combination with [`Parser::skip_section`]
180        /// where the caller will know how many bytes to skip before feeding
181        /// bytes into `Parser` again.
182        size: u32,
183    },
184    /// An entry of the code section, a function, was parsed from a WebAssembly
185    /// module.
186    ///
187    /// This entry indicates that a function was successfully received from the
188    /// code section, and the payload here is the window into the original input
189    /// where the function resides. Note that the function itself has not been
190    /// parsed, it's only been outlined. You'll need to process the
191    /// `FunctionBody` provided to test whether it parses and/or is valid.
192    CodeSectionEntry(FunctionBody<'a>),
193
194    /// A core module section was received and the provided parser can be
195    /// used to parse the nested module.
196    ///
197    /// This variant is special in that it returns a sub-`Parser`. Upon
198    /// receiving a `ModuleSection` it is expected that the returned
199    /// `Parser` will be used instead of the parent `Parser` until the parse has
200    /// finished. You'll need to feed data into the `Parser` returned until it
201    /// returns `Payload::End`. After that you'll switch back to the parent
202    /// parser to resume parsing the rest of the current component.
203    ///
204    /// Note that binaries will not be parsed correctly if you feed the data for
205    /// a nested module into the parent [`Parser`].
206    ModuleSection {
207        /// The parser for the nested module.
208        parser: Parser,
209        /// The range of bytes that represent the nested module in the
210        /// original byte stream.
211        range: Range<usize>,
212    },
213    /// A core instance section was received and the provided parser can be
214    /// used to parse the contents of the core instance section.
215    ///
216    /// Currently this section is only parsed in a component.
217    InstanceSection(InstanceSectionReader<'a>),
218    /// A core type section was received and the provided parser can be
219    /// used to parse the contents of the core type section.
220    ///
221    /// Currently this section is only parsed in a component.
222    CoreTypeSection(CoreTypeSectionReader<'a>),
223    /// A component section from a WebAssembly component was received and the
224    /// provided parser can be used to parse the nested component.
225    ///
226    /// This variant is special in that it returns a sub-`Parser`. Upon
227    /// receiving a `ComponentSection` it is expected that the returned
228    /// `Parser` will be used instead of the parent `Parser` until the parse has
229    /// finished. You'll need to feed data into the `Parser` returned until it
230    /// returns `Payload::End`. After that you'll switch back to the parent
231    /// parser to resume parsing the rest of the current component.
232    ///
233    /// Note that binaries will not be parsed correctly if you feed the data for
234    /// a nested component into the parent [`Parser`].
235    ComponentSection {
236        /// The parser for the nested component.
237        parser: Parser,
238        /// The range of bytes that represent the nested component in the
239        /// original byte stream.
240        range: Range<usize>,
241    },
242    /// A component instance section was received and the provided reader can be
243    /// used to parse the contents of the component instance section.
244    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
245    /// A component alias section was received and the provided reader can be
246    /// used to parse the contents of the component alias section.
247    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
248    /// A component type section was received and the provided reader can be
249    /// used to parse the contents of the component type section.
250    ComponentTypeSection(ComponentTypeSectionReader<'a>),
251    /// A component canonical section was received and the provided reader can be
252    /// used to parse the contents of the component canonical section.
253    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
254    /// A component start section was received.
255    ComponentStartSection {
256        /// The start function description.
257        start: ComponentStartFunction,
258        /// The range of bytes that specify the `start` field.
259        range: Range<usize>,
260    },
261    /// A component import section was received and the provided reader can be
262    /// used to parse the contents of the component import section.
263    ComponentImportSection(ComponentImportSectionReader<'a>),
264    /// A component export section was received, and the provided reader can be
265    /// used to parse the contents of the component export section.
266    ComponentExportSection(ComponentExportSectionReader<'a>),
267
268    /// A module or component custom section was received.
269    CustomSection(CustomSectionReader<'a>),
270
271    /// An unknown section was found.
272    ///
273    /// This variant is returned for all unknown sections encountered. This
274    /// likely wants to be interpreted as an error by consumers of the parser,
275    /// but this can also be used to parse sections currently unsupported by
276    /// the parser.
277    UnknownSection {
278        /// The 8-bit identifier for this section.
279        id: u8,
280        /// The contents of this section.
281        contents: &'a [u8],
282        /// The range of bytes, relative to the start of the original data
283        /// stream, that the contents of this section reside in.
284        range: Range<usize>,
285    },
286
287    /// The end of the WebAssembly module or component was reached.
288    ///
289    /// The value is the offset in the input byte stream where the end
290    /// was reached.
291    End(usize),
292}
293
294const CUSTOM_SECTION: u8 = 0;
295const TYPE_SECTION: u8 = 1;
296const IMPORT_SECTION: u8 = 2;
297const FUNCTION_SECTION: u8 = 3;
298const TABLE_SECTION: u8 = 4;
299const MEMORY_SECTION: u8 = 5;
300const GLOBAL_SECTION: u8 = 6;
301const EXPORT_SECTION: u8 = 7;
302const START_SECTION: u8 = 8;
303const ELEMENT_SECTION: u8 = 9;
304const CODE_SECTION: u8 = 10;
305const DATA_SECTION: u8 = 11;
306const DATA_COUNT_SECTION: u8 = 12;
307const TAG_SECTION: u8 = 13;
308
309const COMPONENT_MODULE_SECTION: u8 = 1;
310const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
311const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
312const COMPONENT_SECTION: u8 = 4;
313const COMPONENT_INSTANCE_SECTION: u8 = 5;
314const COMPONENT_ALIAS_SECTION: u8 = 6;
315const COMPONENT_TYPE_SECTION: u8 = 7;
316const COMPONENT_CANONICAL_SECTION: u8 = 8;
317const COMPONENT_START_SECTION: u8 = 9;
318const COMPONENT_IMPORT_SECTION: u8 = 10;
319const COMPONENT_EXPORT_SECTION: u8 = 11;
320
321impl Parser {
322    /// Creates a new parser.
323    ///
324    /// Reports errors and ranges relative to `offset` provided, where `offset`
325    /// is some logical offset within the input stream that we're parsing.
326    pub fn new(offset: u64) -> Parser {
327        Parser {
328            state: State::Header,
329            offset,
330            max_size: u64::MAX,
331            // Assume the encoding is a module until we know otherwise
332            encoding: Encoding::Module,
333        }
334    }
335
336    /// Attempts to parse a chunk of data.
337    ///
338    /// This method will attempt to parse the next incremental portion of a
339    /// WebAssembly binary. Data available for the module or component is
340    /// provided as `data`, and the data can be incomplete if more data has yet
341    /// to arrive. The `eof` flag indicates whether more data will ever be received.
342    ///
343    /// There are two ways parsing can succeed with this method:
344    ///
345    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
346    ///   in `data` to parse a payload. The caller needs to wait for more data to
347    ///   be available in this situation before calling this method again. It is
348    ///   guaranteed that this is only returned if `eof` is `false`.
349    ///
350    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
351    ///   successfully parsed. The payload is available in this variant of what
352    ///   was parsed, and this also indicates how many bytes of `data` was
353    ///   consumed. It's expected that the caller will not provide these bytes
354    ///   back to the [`Parser`] again.
355    ///
356    /// Note that all `Chunk` return values are connected, with a lifetime, to
357    /// the input buffer. Each parsed chunk borrows the input buffer and is a
358    /// view into it for successfully parsed chunks.
359    ///
360    /// It is expected that you'll call this method until `Payload::End` is
361    /// reached, at which point you're guaranteed that the parse has completed.
362    /// Note that complete parsing, for the top-level module or component,
363    /// implies that `data` is empty and `eof` is `true`.
364    ///
365    /// # Errors
366    ///
367    /// Parse errors are returned as an `Err`. Errors can happen when the
368    /// structure of the data is unexpected or if sections are too large for
369    /// example. Note that errors are not returned for malformed *contents* of
370    /// sections here. Sections are generally not individually parsed and each
371    /// returned [`Payload`] needs to be iterated over further to detect all
372    /// errors.
373    ///
374    /// # Examples
375    ///
376    /// An example of reading a wasm file from a stream (`std::io::Read`) and
377    /// incrementally parsing it.
378    ///
379    /// ```
380    /// use std::io::Read;
381    /// use anyhow::Result;
382    /// use wasmparser_nostd::{Parser, Chunk, Payload::*};
383    ///
384    /// fn parse(mut reader: impl Read) -> Result<()> {
385    ///     let mut buf = Vec::new();
386    ///     let mut parser = Parser::new(0);
387    ///     let mut eof = false;
388    ///     let mut stack = Vec::new();
389    ///
390    ///     loop {
391    ///         let (payload, consumed) = match parser.parse(&buf, eof)? {
392    ///             Chunk::NeedMoreData(hint) => {
393    ///                 assert!(!eof); // otherwise an error would be returned
394    ///
395    ///                 // Use the hint to preallocate more space, then read
396    ///                 // some more data into our buffer.
397    ///                 //
398    ///                 // Note that the buffer management here is not ideal,
399    ///                 // but it's compact enough to fit in an example!
400    ///                 let len = buf.len();
401    ///                 buf.extend((0..hint).map(|_| 0u8));
402    ///                 let n = reader.read(&mut buf[len..])?;
403    ///                 buf.truncate(len + n);
404    ///                 eof = n == 0;
405    ///                 continue;
406    ///             }
407    ///
408    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
409    ///         };
410    ///
411    ///         match payload {
412    ///             // Sections for WebAssembly modules
413    ///             Version { .. } => { /* ... */ }
414    ///             TypeSection(_) => { /* ... */ }
415    ///             ImportSection(_) => { /* ... */ }
416    ///             FunctionSection(_) => { /* ... */ }
417    ///             TableSection(_) => { /* ... */ }
418    ///             MemorySection(_) => { /* ... */ }
419    ///             TagSection(_) => { /* ... */ }
420    ///             GlobalSection(_) => { /* ... */ }
421    ///             ExportSection(_) => { /* ... */ }
422    ///             StartSection { .. } => { /* ... */ }
423    ///             ElementSection(_) => { /* ... */ }
424    ///             DataCountSection { .. } => { /* ... */ }
425    ///             DataSection(_) => { /* ... */ }
426    ///
427    ///             // Here we know how many functions we'll be receiving as
428    ///             // `CodeSectionEntry`, so we can prepare for that, and
429    ///             // afterwards we can parse and handle each function
430    ///             // individually.
431    ///             CodeSectionStart { .. } => { /* ... */ }
432    ///             CodeSectionEntry(body) => {
433    ///                 // here we can iterate over `body` to parse the function
434    ///                 // and its locals
435    ///             }
436    ///
437    ///             // Sections for WebAssembly components
438    ///             ModuleSection { .. } => { /* ... */ }
439    ///             InstanceSection(_) => { /* ... */ }
440    ///             CoreTypeSection(_) => { /* ... */ }
441    ///             ComponentSection { .. } => { /* ... */ }
442    ///             ComponentInstanceSection(_) => { /* ... */ }
443    ///             ComponentAliasSection(_) => { /* ... */ }
444    ///             ComponentTypeSection(_) => { /* ... */ }
445    ///             ComponentCanonicalSection(_) => { /* ... */ }
446    ///             ComponentStartSection { .. } => { /* ... */ }
447    ///             ComponentImportSection(_) => { /* ... */ }
448    ///             ComponentExportSection(_) => { /* ... */ }
449    ///
450    ///             CustomSection(_) => { /* ... */ }
451    ///
452    ///             // most likely you'd return an error here
453    ///             UnknownSection { id, .. } => { /* ... */ }
454    ///
455    ///             // Once we've reached the end of a parser we either resume
456    ///             // at the parent parser or we break out of the loop because
457    ///             // we're done.
458    ///             End(_) => {
459    ///                 if let Some(parent_parser) = stack.pop() {
460    ///                     parser = parent_parser;
461    ///                 } else {
462    ///                     break;
463    ///                 }
464    ///             }
465    ///         }
466    ///
467    ///         // once we're done processing the payload we can forget the
468    ///         // original.
469    ///         buf.drain(..consumed);
470    ///     }
471    ///
472    ///     Ok(())
473    /// }
474    ///
475    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
476    /// ```
477    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
478        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
479            (&data[..(self.max_size as usize)], true)
480        } else {
481            (data, eof)
482        };
483        // TODO: thread through `offset: u64` to `BinaryReader`, remove
484        // the cast here.
485        let mut reader = BinaryReader::new_with_offset(data, self.offset as usize);
486        match self.parse_reader(&mut reader, eof) {
487            Ok(payload) => {
488                // Be sure to update our offset with how far we got in the
489                // reader
490                self.offset += usize_to_u64(reader.position);
491                self.max_size -= usize_to_u64(reader.position);
492                Ok(Chunk::Parsed {
493                    consumed: reader.position,
494                    payload,
495                })
496            }
497            Err(e) => {
498                // If we're at EOF then there's no way we can recover from any
499                // error, so continue to propagate it.
500                if eof {
501                    return Err(e);
502                }
503
504                // If our error doesn't look like it can be resolved with more
505                // data being pulled down, then propagate it, otherwise switch
506                // the error to "feed me please"
507                match e.inner.needed_hint {
508                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
509                    None => Err(e),
510                }
511            }
512        }
513    }
514
515    fn parse_reader<'a>(
516        &mut self,
517        reader: &mut BinaryReader<'a>,
518        eof: bool,
519    ) -> Result<Payload<'a>> {
520        use Payload::*;
521
522        match self.state {
523            State::Header => {
524                const KIND_MODULE: u16 = 0x00;
525                const KIND_COMPONENT: u16 = 0x01;
526
527                let start = reader.original_position();
528                let header_version = reader.read_header_version()?;
529                self.encoding = match (header_version >> 16) as u16 {
530                    KIND_MODULE => Encoding::Module,
531                    KIND_COMPONENT => Encoding::Component,
532                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
533                };
534                let num = header_version as u16;
535                self.state = State::SectionStart;
536                Ok(Version {
537                    num,
538                    encoding: self.encoding,
539                    range: start..reader.original_position(),
540                })
541            }
542            State::SectionStart => {
543                // If we're at eof and there are no bytes in our buffer, then
544                // that means we reached the end of the data since it's
545                // just a bunch of sections concatenated after the header.
546                if eof && reader.bytes_remaining() == 0 {
547                    return Ok(Payload::End(reader.original_position()));
548                }
549
550                let id_pos = reader.position;
551                let id = reader.read_u8()?;
552                if id & 0x80 != 0 {
553                    return Err(BinaryReaderError::new("malformed section id", id_pos));
554                }
555                let len_pos = reader.original_position();
556                let mut len = reader.read_var_u32()?;
557
558                // Test to make sure that this section actually fits within
559                // `Parser::max_size`. This doesn't matter for top-level modules
560                // but it is required for nested modules/components to correctly ensure
561                // that all sections live entirely within their section of the
562                // file.
563                let section_overflow = self
564                    .max_size
565                    .checked_sub(usize_to_u64(reader.position))
566                    .and_then(|s| s.checked_sub(len.into()))
567                    .is_none();
568                if section_overflow {
569                    return Err(BinaryReaderError::new("section too large", len_pos));
570                }
571
572                match (self.encoding, id) {
573                    // Sections for both modules and components.
574                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
575
576                    // Module sections
577                    (Encoding::Module, TYPE_SECTION) => {
578                        section(reader, len, TypeSectionReader::new, TypeSection)
579                    }
580                    (Encoding::Module, IMPORT_SECTION) => {
581                        section(reader, len, ImportSectionReader::new, ImportSection)
582                    }
583                    (Encoding::Module, FUNCTION_SECTION) => {
584                        section(reader, len, FunctionSectionReader::new, FunctionSection)
585                    }
586                    (Encoding::Module, TABLE_SECTION) => {
587                        section(reader, len, TableSectionReader::new, TableSection)
588                    }
589                    (Encoding::Module, MEMORY_SECTION) => {
590                        section(reader, len, MemorySectionReader::new, MemorySection)
591                    }
592                    (Encoding::Module, GLOBAL_SECTION) => {
593                        section(reader, len, GlobalSectionReader::new, GlobalSection)
594                    }
595                    (Encoding::Module, EXPORT_SECTION) => {
596                        section(reader, len, ExportSectionReader::new, ExportSection)
597                    }
598                    (Encoding::Module, START_SECTION) => {
599                        let (func, range) = single_item(reader, len, "start")?;
600                        Ok(StartSection { func, range })
601                    }
602                    (Encoding::Module, ELEMENT_SECTION) => {
603                        section(reader, len, ElementSectionReader::new, ElementSection)
604                    }
605                    (Encoding::Module, CODE_SECTION) => {
606                        let start = reader.original_position();
607                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
608                        let range = start..reader.original_position() + len as usize;
609                        self.state = State::FunctionBody {
610                            remaining: count,
611                            len,
612                        };
613                        Ok(CodeSectionStart {
614                            count,
615                            range,
616                            size: len,
617                        })
618                    }
619                    (Encoding::Module, DATA_SECTION) => {
620                        section(reader, len, DataSectionReader::new, DataSection)
621                    }
622                    (Encoding::Module, DATA_COUNT_SECTION) => {
623                        let (count, range) = single_item(reader, len, "data count")?;
624                        Ok(DataCountSection { count, range })
625                    }
626                    (Encoding::Module, TAG_SECTION) => {
627                        section(reader, len, TagSectionReader::new, TagSection)
628                    }
629
630                    // Component sections
631                    (Encoding::Component, COMPONENT_MODULE_SECTION)
632                    | (Encoding::Component, COMPONENT_SECTION) => {
633                        if len as usize > MAX_WASM_MODULE_SIZE {
634                            bail!(
635                                len_pos,
636                                "{} section is too large",
637                                if id == 1 { "module" } else { "component " }
638                            );
639                        }
640
641                        let range =
642                            reader.original_position()..reader.original_position() + len as usize;
643                        self.max_size -= u64::from(len);
644                        self.offset += u64::from(len);
645                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
646                        parser.max_size = len.into();
647
648                        Ok(match id {
649                            1 => ModuleSection { parser, range },
650                            4 => ComponentSection { parser, range },
651                            _ => unreachable!(),
652                        })
653                    }
654                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
655                        section(reader, len, InstanceSectionReader::new, InstanceSection)
656                    }
657                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
658                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
659                    }
660                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
661                        reader,
662                        len,
663                        ComponentInstanceSectionReader::new,
664                        ComponentInstanceSection,
665                    ),
666                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
667                        section(reader, len, SectionLimited::new, ComponentAliasSection)
668                    }
669                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
670                        reader,
671                        len,
672                        ComponentTypeSectionReader::new,
673                        ComponentTypeSection,
674                    ),
675                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
676                        reader,
677                        len,
678                        ComponentCanonicalSectionReader::new,
679                        ComponentCanonicalSection,
680                    ),
681                    (Encoding::Component, COMPONENT_START_SECTION) => {
682                        let (start, range) = single_item(reader, len, "component start")?;
683                        Ok(ComponentStartSection { start, range })
684                    }
685                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
686                        reader,
687                        len,
688                        ComponentImportSectionReader::new,
689                        ComponentImportSection,
690                    ),
691                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
692                        reader,
693                        len,
694                        ComponentExportSectionReader::new,
695                        ComponentExportSection,
696                    ),
697                    (_, id) => {
698                        let offset = reader.original_position();
699                        let contents = reader.read_bytes(len as usize)?;
700                        let range = offset..offset + len as usize;
701                        Ok(UnknownSection {
702                            id,
703                            contents,
704                            range,
705                        })
706                    }
707                }
708            }
709
710            // Once we hit 0 remaining incrementally parsed items, with 0
711            // remaining bytes in each section, we're done and can switch back
712            // to parsing sections.
713            State::FunctionBody {
714                remaining: 0,
715                len: 0,
716            } => {
717                self.state = State::SectionStart;
718                self.parse_reader(reader, eof)
719            }
720
721            // ... otherwise trailing bytes with no remaining entries in these
722            // sections indicates an error.
723            State::FunctionBody { remaining: 0, len } => {
724                debug_assert!(len > 0);
725                let offset = reader.original_position();
726                Err(BinaryReaderError::new(
727                    "trailing bytes at end of section",
728                    offset,
729                ))
730            }
731
732            // Functions are relatively easy to parse when we know there's at
733            // least one remaining and at least one byte available to read
734            // things.
735            //
736            // We use the remaining length try to read a u32 size of the
737            // function, and using that size we require the entire function be
738            // resident in memory. This means that we're reading whole chunks of
739            // functions at a time.
740            //
741            // Limiting via `Parser::max_size` (nested parsing) happens above in
742            // `fn parse`, and limiting by our section size happens via
743            // `delimited`. Actual parsing of the function body is delegated to
744            // the caller to iterate over the `FunctionBody` structure.
745            State::FunctionBody { remaining, mut len } => {
746                let body = delimited(reader, &mut len, |r| {
747                    let size = r.read_var_u32()?;
748                    let offset = r.original_position();
749                    Ok(FunctionBody::new(offset, r.read_bytes(size as usize)?))
750                })?;
751                self.state = State::FunctionBody {
752                    remaining: remaining - 1,
753                    len,
754                };
755                Ok(CodeSectionEntry(body))
756            }
757        }
758    }
759
760    /// Convenience function that can be used to parse a module or component
761    /// that is entirely resident in memory.
762    ///
763    /// This function will parse the `data` provided as a WebAssembly module
764    /// or component.
765    ///
766    /// Note that when this function yields sections that provide parsers,
767    /// no further action is required for those sections as payloads from
768    /// those parsers will be automatically returned.
769    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
770        let mut stack = Vec::new();
771        let mut cur = self;
772        let mut done = false;
773        iter::from_fn(move || {
774            if done {
775                return None;
776            }
777            let payload = match cur.parse(data, true) {
778                // Propagate all errors
779                Err(e) => {
780                    done = true;
781                    return Some(Err(e));
782                }
783
784                // This isn't possible because `eof` is always true.
785                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
786
787                Ok(Chunk::Parsed { payload, consumed }) => {
788                    data = &data[consumed..];
789                    payload
790                }
791            };
792
793            match &payload {
794                Payload::ModuleSection { parser, .. }
795                | Payload::ComponentSection { parser, .. } => {
796                    stack.push(cur.clone());
797                    cur = parser.clone();
798                }
799                Payload::End(_) => match stack.pop() {
800                    Some(p) => cur = p,
801                    None => done = true,
802                },
803
804                _ => {}
805            }
806
807            Some(Ok(payload))
808        })
809    }
810
811    /// Skip parsing the code section entirely.
812    ///
813    /// This function can be used to indicate, after receiving
814    /// `CodeSectionStart`, that the section will not be parsed.
815    ///
816    /// The caller will be responsible for skipping `size` bytes (found in the
817    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
818    /// after the `size` bytes have been skipped.
819    ///
820    /// # Panics
821    ///
822    /// This function will panic if the parser is not in a state where it's
823    /// parsing the code section.
824    ///
825    /// # Examples
826    ///
827    /// ```
828    /// use wasmparser_nostd::{Result, Parser, Chunk, Payload::*};
829    /// use ::core::ops::Range;
830    ///
831    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
832    ///     let mut parser = Parser::new(0);
833    ///     loop {
834    ///         let payload = match parser.parse(wasm, true)? {
835    ///             Chunk::Parsed { consumed, payload } => {
836    ///                 wasm = &wasm[consumed..];
837    ///                 payload
838    ///             }
839    ///             // this state isn't possible with `eof = true`
840    ///             Chunk::NeedMoreData(_) => unreachable!(),
841    ///         };
842    ///         match payload {
843    ///             TypeSection(s) => print_range("type section", &s.range()),
844    ///             ImportSection(s) => print_range("import section", &s.range()),
845    ///             // .. other sections
846    ///
847    ///             // Print the range of the code section we see, but don't
848    ///             // actually iterate over each individual function.
849    ///             CodeSectionStart { range, size, .. } => {
850    ///                 print_range("code section", &range);
851    ///                 parser.skip_section();
852    ///                 wasm = &wasm[size as usize..];
853    ///             }
854    ///             End(_) => break,
855    ///             _ => {}
856    ///         }
857    ///     }
858    ///     Ok(())
859    /// }
860    ///
861    /// fn print_range(section: &str, range: &Range<usize>) {
862    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
863    /// }
864    /// ```
865    pub fn skip_section(&mut self) {
866        let skip = match self.state {
867            State::FunctionBody { remaining: _, len } => len,
868            _ => panic!("wrong state to call `skip_section`"),
869        };
870        self.offset += u64::from(skip);
871        self.max_size -= u64::from(skip);
872        self.state = State::SectionStart;
873    }
874}
875
876fn usize_to_u64(a: usize) -> u64 {
877    a.try_into().unwrap()
878}
879
880/// Parses an entire section resident in memory into a `Payload`.
881///
882/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
883/// to construct the section to return.
884fn section<'a, T>(
885    reader: &mut BinaryReader<'a>,
886    len: u32,
887    ctor: fn(&'a [u8], usize) -> Result<T>,
888    variant: fn(T) -> Payload<'a>,
889) -> Result<Payload<'a>> {
890    let offset = reader.original_position();
891    let payload = reader.read_bytes(len as usize)?;
892    // clear the hint for "need this many more bytes" here because we already
893    // read all the bytes, so it's not possible to read more bytes if this
894    // fails.
895    let reader = ctor(payload, offset).map_err(clear_hint)?;
896    Ok(variant(reader))
897}
898
899/// Reads a section that is represented by a single uleb-encoded `u32`.
900fn single_item<'a, T>(
901    reader: &mut BinaryReader<'a>,
902    len: u32,
903    desc: &str,
904) -> Result<(T, Range<usize>)>
905where
906    T: FromReader<'a>,
907{
908    let range = reader.original_position()..reader.original_position() + len as usize;
909    let mut content = BinaryReader::new_with_offset(reader.read_bytes(len as usize)?, range.start);
910    // We can't recover from "unexpected eof" here because our entire section is
911    // already resident in memory, so clear the hint for how many more bytes are
912    // expected.
913    let ret = content.read().map_err(clear_hint)?;
914    if !content.eof() {
915        bail!(
916            content.original_position(),
917            "unexpected content in the {desc} section",
918        );
919    }
920    Ok((ret, range))
921}
922
923/// Attempts to parse using `f`.
924///
925/// This will update `*len` with the number of bytes consumed, and it will cause
926/// a failure to be returned instead of the number of bytes consumed exceeds
927/// what `*len` currently is.
928fn delimited<'a, T>(
929    reader: &mut BinaryReader<'a>,
930    len: &mut u32,
931    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
932) -> Result<T> {
933    let start = reader.position;
934    let ret = f(reader)?;
935    *len = match (reader.position - start)
936        .try_into()
937        .ok()
938        .and_then(|i| len.checked_sub(i))
939    {
940        Some(i) => i,
941        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
942    };
943    Ok(ret)
944}
945
946impl Default for Parser {
947    fn default() -> Parser {
948        Parser::new(0)
949    }
950}
951
952impl Payload<'_> {
953    /// If this `Payload` represents a section in the original wasm module then
954    /// the section's id and range within the original wasm binary are returned.
955    ///
956    /// Not all payloads refer to entire sections, such as the `Version` and
957    /// `CodeSectionEntry` variants. These variants will return `None` from this
958    /// function.
959    ///
960    /// Otherwise this function will return `Some` where the first element is
961    /// the byte identifier for the section and the second element is the range
962    /// of the contents of the section within the original wasm binary.
963    ///
964    /// The purpose of this method is to enable tools to easily iterate over
965    /// entire sections if necessary and handle sections uniformly, for example
966    /// dropping custom sections while preserving all other sections.
967    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
968        use Payload::*;
969
970        match self {
971            Version { .. } => None,
972            TypeSection(s) => Some((TYPE_SECTION, s.range())),
973            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
974            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
975            TableSection(s) => Some((TABLE_SECTION, s.range())),
976            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
977            TagSection(s) => Some((TAG_SECTION, s.range())),
978            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
979            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
980            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
981            DataSection(s) => Some((DATA_SECTION, s.range())),
982            StartSection { range, .. } => Some((START_SECTION, range.clone())),
983            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
984            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
985            CodeSectionEntry(_) => None,
986
987            ModuleSection { range, .. } => Some((COMPONENT_MODULE_SECTION, range.clone())),
988            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
989            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
990            ComponentSection { range, .. } => Some((COMPONENT_SECTION, range.clone())),
991            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
992            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
993            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
994            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
995            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
996            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
997            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
998
999            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1000
1001            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1002
1003            End(_) => None,
1004        }
1005    }
1006}
1007
1008impl fmt::Debug for Payload<'_> {
1009    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1010        use Payload::*;
1011        match self {
1012            Version {
1013                num,
1014                encoding,
1015                range,
1016            } => f
1017                .debug_struct("Version")
1018                .field("num", num)
1019                .field("encoding", encoding)
1020                .field("range", range)
1021                .finish(),
1022
1023            // Module sections
1024            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1025            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1026            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1027            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1028            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1029            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1030            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1031            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1032            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1033            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1034            StartSection { func, range } => f
1035                .debug_struct("StartSection")
1036                .field("func", func)
1037                .field("range", range)
1038                .finish(),
1039            DataCountSection { count, range } => f
1040                .debug_struct("DataCountSection")
1041                .field("count", count)
1042                .field("range", range)
1043                .finish(),
1044            CodeSectionStart { count, range, size } => f
1045                .debug_struct("CodeSectionStart")
1046                .field("count", count)
1047                .field("range", range)
1048                .field("size", size)
1049                .finish(),
1050            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1051
1052            // Component sections
1053            ModuleSection { parser: _, range } => f
1054                .debug_struct("ModuleSection")
1055                .field("range", range)
1056                .finish(),
1057            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1058            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1059            ComponentSection { parser: _, range } => f
1060                .debug_struct("ComponentSection")
1061                .field("range", range)
1062                .finish(),
1063            ComponentInstanceSection(_) => f
1064                .debug_tuple("ComponentInstanceSection")
1065                .field(&"...")
1066                .finish(),
1067            ComponentAliasSection(_) => f
1068                .debug_tuple("ComponentAliasSection")
1069                .field(&"...")
1070                .finish(),
1071            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1072            ComponentCanonicalSection(_) => f
1073                .debug_tuple("ComponentCanonicalSection")
1074                .field(&"...")
1075                .finish(),
1076            ComponentStartSection { .. } => f
1077                .debug_tuple("ComponentStartSection")
1078                .field(&"...")
1079                .finish(),
1080            ComponentImportSection(_) => f
1081                .debug_tuple("ComponentImportSection")
1082                .field(&"...")
1083                .finish(),
1084            ComponentExportSection(_) => f
1085                .debug_tuple("ComponentExportSection")
1086                .field(&"...")
1087                .finish(),
1088
1089            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1090
1091            UnknownSection { id, range, .. } => f
1092                .debug_struct("UnknownSection")
1093                .field("id", id)
1094                .field("range", range)
1095                .finish(),
1096
1097            End(offset) => f.debug_tuple("End").field(offset).finish(),
1098        }
1099    }
1100}
1101
1102fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1103    err.inner.needed_hint = None;
1104    err
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109    use super::*;
1110
1111    macro_rules! assert_matches {
1112        ($a:expr, $b:pat $(,)?) => {
1113            match $a {
1114                $b => {}
1115                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1116            }
1117        };
1118    }
1119
1120    #[test]
1121    fn header() {
1122        assert!(Parser::default().parse(&[], true).is_err());
1123        assert_matches!(
1124            Parser::default().parse(&[], false),
1125            Ok(Chunk::NeedMoreData(4)),
1126        );
1127        assert_matches!(
1128            Parser::default().parse(b"\0", false),
1129            Ok(Chunk::NeedMoreData(3)),
1130        );
1131        assert_matches!(
1132            Parser::default().parse(b"\0asm", false),
1133            Ok(Chunk::NeedMoreData(4)),
1134        );
1135        assert_matches!(
1136            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1137            Ok(Chunk::Parsed {
1138                consumed: 8,
1139                payload: Payload::Version { num: 1, .. },
1140            }),
1141        );
1142    }
1143
1144    #[test]
1145    fn header_iter() {
1146        for _ in Parser::default().parse_all(&[]) {}
1147        for _ in Parser::default().parse_all(b"\0") {}
1148        for _ in Parser::default().parse_all(b"\0asm") {}
1149        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1150    }
1151
1152    fn parser_after_header() -> Parser {
1153        let mut p = Parser::default();
1154        assert_matches!(
1155            p.parse(b"\0asm\x01\0\0\0", false),
1156            Ok(Chunk::Parsed {
1157                consumed: 8,
1158                payload: Payload::Version {
1159                    num: WASM_MODULE_VERSION,
1160                    encoding: Encoding::Module,
1161                    ..
1162                },
1163            }),
1164        );
1165        p
1166    }
1167
1168    fn parser_after_component_header() -> Parser {
1169        let mut p = Parser::default();
1170        assert_matches!(
1171            p.parse(b"\0asm\x0c\0\x01\0", false),
1172            Ok(Chunk::Parsed {
1173                consumed: 8,
1174                payload: Payload::Version {
1175                    num: WASM_COMPONENT_VERSION,
1176                    encoding: Encoding::Component,
1177                    ..
1178                },
1179            }),
1180        );
1181        p
1182    }
1183
1184    #[test]
1185    fn start_section() {
1186        assert_matches!(
1187            parser_after_header().parse(&[], false),
1188            Ok(Chunk::NeedMoreData(1)),
1189        );
1190        assert!(parser_after_header().parse(&[8], true).is_err());
1191        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1192        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1193        assert_matches!(
1194            parser_after_header().parse(&[8], false),
1195            Ok(Chunk::NeedMoreData(1)),
1196        );
1197        assert_matches!(
1198            parser_after_header().parse(&[8, 1], false),
1199            Ok(Chunk::NeedMoreData(1)),
1200        );
1201        assert_matches!(
1202            parser_after_header().parse(&[8, 2], false),
1203            Ok(Chunk::NeedMoreData(2)),
1204        );
1205        assert_matches!(
1206            parser_after_header().parse(&[8, 1, 1], false),
1207            Ok(Chunk::Parsed {
1208                consumed: 3,
1209                payload: Payload::StartSection { func: 1, .. },
1210            }),
1211        );
1212        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1213        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1214    }
1215
1216    #[test]
1217    fn end_works() {
1218        assert_matches!(
1219            parser_after_header().parse(&[], true),
1220            Ok(Chunk::Parsed {
1221                consumed: 0,
1222                payload: Payload::End(8),
1223            }),
1224        );
1225    }
1226
1227    #[test]
1228    fn type_section() {
1229        assert!(parser_after_header().parse(&[1], true).is_err());
1230        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1231        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1232        assert_matches!(
1233            parser_after_header().parse(&[1], false),
1234            Ok(Chunk::NeedMoreData(1)),
1235        );
1236        assert_matches!(
1237            parser_after_header().parse(&[1, 1], false),
1238            Ok(Chunk::NeedMoreData(1)),
1239        );
1240        assert_matches!(
1241            parser_after_header().parse(&[1, 1, 1], false),
1242            Ok(Chunk::Parsed {
1243                consumed: 3,
1244                payload: Payload::TypeSection(_),
1245            }),
1246        );
1247        assert_matches!(
1248            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1249            Ok(Chunk::Parsed {
1250                consumed: 3,
1251                payload: Payload::TypeSection(_),
1252            }),
1253        );
1254    }
1255
1256    #[test]
1257    fn custom_section() {
1258        assert!(parser_after_header().parse(&[0], true).is_err());
1259        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1260        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1261        assert_matches!(
1262            parser_after_header().parse(&[0, 2, 1], false),
1263            Ok(Chunk::NeedMoreData(1)),
1264        );
1265        assert_matches!(
1266            parser_after_header().parse(&[0, 1, 0], false),
1267            Ok(Chunk::Parsed {
1268                consumed: 3,
1269                payload: Payload::CustomSection(CustomSectionReader {
1270                    name: "",
1271                    data_offset: 11,
1272                    data: b"",
1273                    range: Range { start: 10, end: 11 },
1274                }),
1275            }),
1276        );
1277        assert_matches!(
1278            parser_after_header().parse(&[0, 2, 1, b'a'], false),
1279            Ok(Chunk::Parsed {
1280                consumed: 4,
1281                payload: Payload::CustomSection(CustomSectionReader {
1282                    name: "a",
1283                    data_offset: 12,
1284                    data: b"",
1285                    range: Range { start: 10, end: 12 },
1286                }),
1287            }),
1288        );
1289        assert_matches!(
1290            parser_after_header().parse(&[0, 2, 0, b'a'], false),
1291            Ok(Chunk::Parsed {
1292                consumed: 4,
1293                payload: Payload::CustomSection(CustomSectionReader {
1294                    name: "",
1295                    data_offset: 11,
1296                    data: b"a",
1297                    range: Range { start: 10, end: 12 },
1298                }),
1299            }),
1300        );
1301    }
1302
1303    #[test]
1304    fn function_section() {
1305        assert!(parser_after_header().parse(&[10], true).is_err());
1306        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1307        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1308        assert_matches!(
1309            parser_after_header().parse(&[10], false),
1310            Ok(Chunk::NeedMoreData(1))
1311        );
1312        assert_matches!(
1313            parser_after_header().parse(&[10, 1], false),
1314            Ok(Chunk::NeedMoreData(1))
1315        );
1316        let mut p = parser_after_header();
1317        assert_matches!(
1318            p.parse(&[10, 1, 0], false),
1319            Ok(Chunk::Parsed {
1320                consumed: 3,
1321                payload: Payload::CodeSectionStart { count: 0, .. },
1322            }),
1323        );
1324        assert_matches!(
1325            p.parse(&[], true),
1326            Ok(Chunk::Parsed {
1327                consumed: 0,
1328                payload: Payload::End(11),
1329            }),
1330        );
1331        let mut p = parser_after_header();
1332        assert_matches!(
1333            p.parse(&[10, 2, 1, 0], false),
1334            Ok(Chunk::Parsed {
1335                consumed: 3,
1336                payload: Payload::CodeSectionStart { count: 1, .. },
1337            }),
1338        );
1339        assert_matches!(
1340            p.parse(&[0], false),
1341            Ok(Chunk::Parsed {
1342                consumed: 1,
1343                payload: Payload::CodeSectionEntry(_),
1344            }),
1345        );
1346        assert_matches!(
1347            p.parse(&[], true),
1348            Ok(Chunk::Parsed {
1349                consumed: 0,
1350                payload: Payload::End(12),
1351            }),
1352        );
1353
1354        // 1 byte section with 1 function can't read the function body because
1355        // the section is too small
1356        let mut p = parser_after_header();
1357        assert_matches!(
1358            p.parse(&[10, 1, 1], false),
1359            Ok(Chunk::Parsed {
1360                consumed: 3,
1361                payload: Payload::CodeSectionStart { count: 1, .. },
1362            }),
1363        );
1364        assert_eq!(
1365            p.parse(&[0], false).unwrap_err().message(),
1366            "unexpected end-of-file"
1367        );
1368
1369        // section with 2 functions but section is cut off
1370        let mut p = parser_after_header();
1371        assert_matches!(
1372            p.parse(&[10, 2, 2], false),
1373            Ok(Chunk::Parsed {
1374                consumed: 3,
1375                payload: Payload::CodeSectionStart { count: 2, .. },
1376            }),
1377        );
1378        assert_matches!(
1379            p.parse(&[0], false),
1380            Ok(Chunk::Parsed {
1381                consumed: 1,
1382                payload: Payload::CodeSectionEntry(_),
1383            }),
1384        );
1385        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1386        assert_eq!(
1387            p.parse(&[0], false).unwrap_err().message(),
1388            "unexpected end-of-file",
1389        );
1390
1391        // trailing data is bad
1392        let mut p = parser_after_header();
1393        assert_matches!(
1394            p.parse(&[10, 3, 1], false),
1395            Ok(Chunk::Parsed {
1396                consumed: 3,
1397                payload: Payload::CodeSectionStart { count: 1, .. },
1398            }),
1399        );
1400        assert_matches!(
1401            p.parse(&[0], false),
1402            Ok(Chunk::Parsed {
1403                consumed: 1,
1404                payload: Payload::CodeSectionEntry(_),
1405            }),
1406        );
1407        assert_eq!(
1408            p.parse(&[0], false).unwrap_err().message(),
1409            "trailing bytes at end of section",
1410        );
1411    }
1412
1413    #[test]
1414    fn single_module() {
1415        let mut p = parser_after_component_header();
1416        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1417
1418        // A module that's 8 bytes in length
1419        let mut sub = match p.parse(&[1, 8], false) {
1420            Ok(Chunk::Parsed {
1421                consumed: 2,
1422                payload: Payload::ModuleSection { parser, .. },
1423            }) => parser,
1424            other => panic!("bad parse {:?}", other),
1425        };
1426
1427        // Parse the header of the submodule with the sub-parser.
1428        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1429        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1430        assert_matches!(
1431            sub.parse(b"\0asm\x01\0\0\0", false),
1432            Ok(Chunk::Parsed {
1433                consumed: 8,
1434                payload: Payload::Version {
1435                    num: 1,
1436                    encoding: Encoding::Module,
1437                    ..
1438                },
1439            }),
1440        );
1441
1442        // The sub-parser should be byte-limited so the next byte shouldn't get
1443        // consumed, it's intended for the parent parser.
1444        assert_matches!(
1445            sub.parse(&[10], false),
1446            Ok(Chunk::Parsed {
1447                consumed: 0,
1448                payload: Payload::End(18),
1449            }),
1450        );
1451
1452        // The parent parser should now be back to resuming, and we simulate it
1453        // being done with bytes to ensure that it's safely at the end,
1454        // completing the module code section.
1455        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1456        assert_matches!(
1457            p.parse(&[], true),
1458            Ok(Chunk::Parsed {
1459                consumed: 0,
1460                payload: Payload::End(18),
1461            }),
1462        );
1463    }
1464
1465    #[test]
1466    fn nested_section_too_big() {
1467        let mut p = parser_after_component_header();
1468
1469        // A module that's 10 bytes in length
1470        let mut sub = match p.parse(&[1, 10], false) {
1471            Ok(Chunk::Parsed {
1472                consumed: 2,
1473                payload: Payload::ModuleSection { parser, .. },
1474            }) => parser,
1475            other => panic!("bad parse {:?}", other),
1476        };
1477
1478        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1479        // module.
1480        assert_matches!(
1481            sub.parse(b"\0asm\x01\0\0\0", false),
1482            Ok(Chunk::Parsed {
1483                consumed: 8,
1484                payload: Payload::Version { num: 1, .. },
1485            }),
1486        );
1487
1488        // We can't parse a section which declares its bigger than the outer
1489        // module. This is a custom section, one byte big, with one content byte. The
1490        // content byte, however, lives outside of the parent's module code
1491        // section.
1492        assert_eq!(
1493            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1494            "section too large",
1495        );
1496    }
1497}
wasmparser_nostd/parser.rs

wasmparser_nostd/
parser.rs