wasmparser/
parser.rs

1#[cfg(feature = "features")]
2use crate::WasmFeatures;
3use crate::binary_reader::WASM_MAGIC_NUMBER;
4use crate::prelude::*;
5use crate::{
6    BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
7    ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
8    ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
9    TypeSectionReader,
10};
11#[cfg(feature = "component-model")]
12use crate::{
13    ComponentCanonicalSectionReader, ComponentExportSectionReader, ComponentImportSectionReader,
14    ComponentInstanceSectionReader, ComponentStartFunction, ComponentTypeSectionReader,
15    CoreTypeSectionReader, InstanceSectionReader, SectionLimited, limits::MAX_WASM_MODULE_SIZE,
16};
17use core::fmt;
18use core::iter;
19use core::ops::Range;
20
21pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
22
23// Note that this started at `0xa` and we're incrementing up from there. When
24// the component model is stabilized this will become 0x1. The changes here are:
25//
26// * [????-??-??] 0xa - original version
27// * [2023-01-05] 0xb - `export` introduces an alias
28// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
29// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
30//                      allows for `(import (interface "...") ...)` syntax.
31pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
32
33const KIND_MODULE: u16 = 0x00;
34const KIND_COMPONENT: u16 = 0x01;
35
36/// The supported encoding formats for the parser.
37#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38pub enum Encoding {
39    /// The encoding format is a WebAssembly module.
40    Module,
41    /// The encoding format is a WebAssembly component.
42    Component,
43}
44
45#[derive(Debug, Clone, Default)]
46struct ParserCounts {
47    function_entries: Option<u32>,
48    code_entries: Option<u32>,
49    data_entries: Option<u32>,
50    data_count: Option<u32>,
51    #[cfg(feature = "component-model")]
52    component_start_sections: bool,
53}
54
55// Section order for WebAssembly modules.
56//
57// Component sections are unordered and allow for duplicates,
58// so this isn't used for components.
59#[derive(Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Debug)]
60pub(crate) enum Order {
61    #[default]
62    Initial,
63    Type,
64    Import,
65    Function,
66    Table,
67    Memory,
68    Tag,
69    Global,
70    Export,
71    Start,
72    Element,
73    DataCount,
74    Code,
75    Data,
76}
77
78/// An incremental parser of a binary WebAssembly module or component.
79///
80/// This type is intended to be used to incrementally parse a WebAssembly module
81/// or component as bytes become available for the module. This can also be used
82/// to parse modules or components that are already entirely resident within memory.
83///
84/// This primary function for a parser is the [`Parser::parse`] function which
85/// will incrementally consume input. You can also use the [`Parser::parse_all`]
86/// function to parse a module or component that is entirely resident in memory.
87#[derive(Debug, Clone)]
88pub struct Parser {
89    state: State,
90    offset: u64,
91    max_size: u64,
92    encoding: Encoding,
93    #[cfg(feature = "features")]
94    features: WasmFeatures,
95    counts: ParserCounts,
96    order: (Order, u64),
97}
98
99#[derive(Debug, Clone)]
100enum State {
101    Header,
102    SectionStart,
103    FunctionBody { remaining: u32, len: u32 },
104}
105
106/// A successful return payload from [`Parser::parse`].
107///
108/// On success one of two possible values can be returned, either that more data
109/// is needed to continue parsing or a chunk of the input was parsed, indicating
110/// how much of it was parsed.
111#[derive(Debug)]
112pub enum Chunk<'a> {
113    /// This can be returned at any time and indicates that more data is needed
114    /// to proceed with parsing. Zero bytes were consumed from the input to
115    /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
116    /// bytes are needed to continue parsing.
117    NeedMoreData(u64),
118
119    /// A chunk was successfully parsed.
120    Parsed {
121        /// This many bytes of the `data` input to [`Parser::parse`] were
122        /// consumed to produce `payload`.
123        consumed: usize,
124        /// The value that we actually parsed.
125        payload: Payload<'a>,
126    },
127}
128
129/// Values that can be parsed from a WebAssembly module or component.
130///
131/// This enumeration is all possible chunks of pieces that can be parsed by a
132/// [`Parser`] from a binary WebAssembly module or component. Note that for many
133/// sections the entire section is parsed all at once, whereas other functions,
134/// like the code section, are parsed incrementally. This is a distinction where some
135/// sections, like the type section, are required to be fully resident in memory
136/// (fully downloaded) before proceeding. Other sections, like the code section,
137/// can be processed in a streaming fashion where each function is extracted
138/// individually so it can possibly be shipped to another thread while you wait
139/// for more functions to get downloaded.
140///
141/// Note that payloads, when returned, do not indicate that the module or component
142/// is valid. For example when you receive a `Payload::TypeSection` the type
143/// section itself has not yet actually been parsed. The reader returned will be
144/// able to parse it, but you'll have to actually iterate the reader to do the
145/// full parse. Each payload returned is intended to be a *window* into the
146/// original `data` passed to [`Parser::parse`] which can be further processed
147/// if necessary.
148#[non_exhaustive]
149pub enum Payload<'a> {
150    /// Indicates the header of a WebAssembly module or component.
151    Version {
152        /// The version number found in the header.
153        num: u16,
154        /// The encoding format being parsed.
155        encoding: Encoding,
156        /// The range of bytes that were parsed to consume the header of the
157        /// module or component. Note that this range is relative to the start
158        /// of the byte stream.
159        range: Range<usize>,
160    },
161
162    /// A module type section was received and the provided reader can be
163    /// used to parse the contents of the type section.
164    TypeSection(TypeSectionReader<'a>),
165    /// A module import section was received and the provided reader can be
166    /// used to parse the contents of the import section.
167    ImportSection(ImportSectionReader<'a>),
168    /// A module function section was received and the provided reader can be
169    /// used to parse the contents of the function section.
170    FunctionSection(FunctionSectionReader<'a>),
171    /// A module table section was received and the provided reader can be
172    /// used to parse the contents of the table section.
173    TableSection(TableSectionReader<'a>),
174    /// A module memory section was received and the provided reader can be
175    /// used to parse the contents of the memory section.
176    MemorySection(MemorySectionReader<'a>),
177    /// A module tag section was received, and the provided reader can be
178    /// used to parse the contents of the tag section.
179    TagSection(TagSectionReader<'a>),
180    /// A module global section was received and the provided reader can be
181    /// used to parse the contents of the global section.
182    GlobalSection(GlobalSectionReader<'a>),
183    /// A module export section was received, and the provided reader can be
184    /// used to parse the contents of the export section.
185    ExportSection(ExportSectionReader<'a>),
186    /// A module start section was received.
187    StartSection {
188        /// The start function index
189        func: u32,
190        /// The range of bytes that specify the `func` field, specified in
191        /// offsets relative to the start of the byte stream.
192        range: Range<usize>,
193    },
194    /// A module element section was received and the provided reader can be
195    /// used to parse the contents of the element section.
196    ElementSection(ElementSectionReader<'a>),
197    /// A module data count section was received.
198    DataCountSection {
199        /// The number of data segments.
200        count: u32,
201        /// The range of bytes that specify the `count` field, specified in
202        /// offsets relative to the start of the byte stream.
203        range: Range<usize>,
204    },
205    /// A module data section was received and the provided reader can be
206    /// used to parse the contents of the data section.
207    DataSection(DataSectionReader<'a>),
208    /// Indicator of the start of the code section of a WebAssembly module.
209    ///
210    /// This entry is returned whenever the code section starts. The `count`
211    /// field indicates how many entries are in this code section. After
212    /// receiving this start marker you're guaranteed that the next `count`
213    /// items will be either `CodeSectionEntry` or an error will be returned.
214    ///
215    /// This, unlike other sections, is intended to be used for streaming the
216    /// contents of the code section. The code section is not required to be
217    /// fully resident in memory when we parse it. Instead a [`Parser`] is
218    /// capable of parsing piece-by-piece of a code section.
219    CodeSectionStart {
220        /// The number of functions in this section.
221        count: u32,
222        /// The range of bytes that represent this section, specified in
223        /// offsets relative to the start of the byte stream.
224        range: Range<usize>,
225        /// The size, in bytes, of the remaining contents of this section.
226        ///
227        /// This can be used in combination with [`Parser::skip_section`]
228        /// where the caller will know how many bytes to skip before feeding
229        /// bytes into `Parser` again.
230        size: u32,
231    },
232    /// An entry of the code section, a function, was parsed from a WebAssembly
233    /// module.
234    ///
235    /// This entry indicates that a function was successfully received from the
236    /// code section, and the payload here is the window into the original input
237    /// where the function resides. Note that the function itself has not been
238    /// parsed, it's only been outlined. You'll need to process the
239    /// `FunctionBody` provided to test whether it parses and/or is valid.
240    CodeSectionEntry(FunctionBody<'a>),
241
242    /// A core module section was received and the provided parser can be
243    /// used to parse the nested module.
244    ///
245    /// This variant is special in that it returns a sub-`Parser`. Upon
246    /// receiving a `ModuleSection` it is expected that the returned
247    /// `Parser` will be used instead of the parent `Parser` until the parse has
248    /// finished. You'll need to feed data into the `Parser` returned until it
249    /// returns `Payload::End`. After that you'll switch back to the parent
250    /// parser to resume parsing the rest of the current component.
251    ///
252    /// Note that binaries will not be parsed correctly if you feed the data for
253    /// a nested module into the parent [`Parser`].
254    #[cfg(feature = "component-model")]
255    ModuleSection {
256        /// The parser for the nested module.
257        parser: Parser,
258        /// The range of bytes that represent the nested module in the
259        /// original byte stream.
260        ///
261        /// Note that, to better support streaming parsing and validation, the
262        /// validator does *not* check that this range is in bounds.
263        unchecked_range: Range<usize>,
264    },
265    /// A core instance section was received and the provided parser can be
266    /// used to parse the contents of the core instance section.
267    ///
268    /// Currently this section is only parsed in a component.
269    #[cfg(feature = "component-model")]
270    InstanceSection(InstanceSectionReader<'a>),
271    /// A core type section was received and the provided parser can be
272    /// used to parse the contents of the core type section.
273    ///
274    /// Currently this section is only parsed in a component.
275    #[cfg(feature = "component-model")]
276    CoreTypeSection(CoreTypeSectionReader<'a>),
277    /// A component section from a WebAssembly component was received and the
278    /// provided parser can be used to parse the nested component.
279    ///
280    /// This variant is special in that it returns a sub-`Parser`. Upon
281    /// receiving a `ComponentSection` it is expected that the returned
282    /// `Parser` will be used instead of the parent `Parser` until the parse has
283    /// finished. You'll need to feed data into the `Parser` returned until it
284    /// returns `Payload::End`. After that you'll switch back to the parent
285    /// parser to resume parsing the rest of the current component.
286    ///
287    /// Note that binaries will not be parsed correctly if you feed the data for
288    /// a nested component into the parent [`Parser`].
289    #[cfg(feature = "component-model")]
290    ComponentSection {
291        /// The parser for the nested component.
292        parser: Parser,
293        /// The range of bytes that represent the nested component in the
294        /// original byte stream.
295        ///
296        /// Note that, to better support streaming parsing and validation, the
297        /// validator does *not* check that this range is in bounds.
298        unchecked_range: Range<usize>,
299    },
300    /// A component instance section was received and the provided reader can be
301    /// used to parse the contents of the component instance section.
302    #[cfg(feature = "component-model")]
303    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
304    /// A component alias section was received and the provided reader can be
305    /// used to parse the contents of the component alias section.
306    #[cfg(feature = "component-model")]
307    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
308    /// A component type section was received and the provided reader can be
309    /// used to parse the contents of the component type section.
310    #[cfg(feature = "component-model")]
311    ComponentTypeSection(ComponentTypeSectionReader<'a>),
312    /// A component canonical section was received and the provided reader can be
313    /// used to parse the contents of the component canonical section.
314    #[cfg(feature = "component-model")]
315    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
316    /// A component start section was received.
317    #[cfg(feature = "component-model")]
318    ComponentStartSection {
319        /// The start function description.
320        start: ComponentStartFunction,
321        /// The range of bytes that specify the `start` field.
322        range: Range<usize>,
323    },
324    /// A component import section was received and the provided reader can be
325    /// used to parse the contents of the component import section.
326    #[cfg(feature = "component-model")]
327    ComponentImportSection(ComponentImportSectionReader<'a>),
328    /// A component export section was received, and the provided reader can be
329    /// used to parse the contents of the component export section.
330    #[cfg(feature = "component-model")]
331    ComponentExportSection(ComponentExportSectionReader<'a>),
332
333    /// A module or component custom section was received.
334    CustomSection(CustomSectionReader<'a>),
335
336    /// An unknown section was found.
337    ///
338    /// This variant is returned for all unknown sections encountered. This
339    /// likely wants to be interpreted as an error by consumers of the parser,
340    /// but this can also be used to parse sections currently unsupported by
341    /// the parser.
342    UnknownSection {
343        /// The 8-bit identifier for this section.
344        id: u8,
345        /// The contents of this section.
346        contents: &'a [u8],
347        /// The range of bytes, relative to the start of the original data
348        /// stream, that the contents of this section reside in.
349        range: Range<usize>,
350    },
351
352    /// The end of the WebAssembly module or component was reached.
353    ///
354    /// The value is the offset in the input byte stream where the end
355    /// was reached.
356    End(usize),
357}
358
359const CUSTOM_SECTION: u8 = 0;
360const TYPE_SECTION: u8 = 1;
361const IMPORT_SECTION: u8 = 2;
362const FUNCTION_SECTION: u8 = 3;
363const TABLE_SECTION: u8 = 4;
364const MEMORY_SECTION: u8 = 5;
365const GLOBAL_SECTION: u8 = 6;
366const EXPORT_SECTION: u8 = 7;
367const START_SECTION: u8 = 8;
368const ELEMENT_SECTION: u8 = 9;
369const CODE_SECTION: u8 = 10;
370const DATA_SECTION: u8 = 11;
371const DATA_COUNT_SECTION: u8 = 12;
372const TAG_SECTION: u8 = 13;
373
374#[cfg(feature = "component-model")]
375const COMPONENT_MODULE_SECTION: u8 = 1;
376#[cfg(feature = "component-model")]
377const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
378#[cfg(feature = "component-model")]
379const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
380#[cfg(feature = "component-model")]
381const COMPONENT_SECTION: u8 = 4;
382#[cfg(feature = "component-model")]
383const COMPONENT_INSTANCE_SECTION: u8 = 5;
384#[cfg(feature = "component-model")]
385const COMPONENT_ALIAS_SECTION: u8 = 6;
386#[cfg(feature = "component-model")]
387const COMPONENT_TYPE_SECTION: u8 = 7;
388#[cfg(feature = "component-model")]
389const COMPONENT_CANONICAL_SECTION: u8 = 8;
390#[cfg(feature = "component-model")]
391const COMPONENT_START_SECTION: u8 = 9;
392#[cfg(feature = "component-model")]
393const COMPONENT_IMPORT_SECTION: u8 = 10;
394#[cfg(feature = "component-model")]
395const COMPONENT_EXPORT_SECTION: u8 = 11;
396
397impl Parser {
398    /// Creates a new parser.
399    ///
400    /// Reports errors and ranges relative to `offset` provided, where `offset`
401    /// is some logical offset within the input stream that we're parsing.
402    pub fn new(offset: u64) -> Parser {
403        Parser {
404            state: State::Header,
405            offset,
406            max_size: u64::MAX,
407            // Assume the encoding is a module until we know otherwise
408            encoding: Encoding::Module,
409            #[cfg(feature = "features")]
410            features: WasmFeatures::all(),
411            counts: ParserCounts::default(),
412            order: (Order::default(), offset),
413        }
414    }
415
416    /// Tests whether `bytes` looks like a core WebAssembly module.
417    ///
418    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
419    /// starts with the standard core WebAssembly header.
420    pub fn is_core_wasm(bytes: &[u8]) -> bool {
421        const HEADER: [u8; 8] = [
422            WASM_MAGIC_NUMBER[0],
423            WASM_MAGIC_NUMBER[1],
424            WASM_MAGIC_NUMBER[2],
425            WASM_MAGIC_NUMBER[3],
426            WASM_MODULE_VERSION.to_le_bytes()[0],
427            WASM_MODULE_VERSION.to_le_bytes()[1],
428            KIND_MODULE.to_le_bytes()[0],
429            KIND_MODULE.to_le_bytes()[1],
430        ];
431        bytes.starts_with(&HEADER)
432    }
433
434    /// Tests whether `bytes` looks like a WebAssembly component.
435    ///
436    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
437    /// starts with the standard WebAssembly component header.
438    pub fn is_component(bytes: &[u8]) -> bool {
439        const HEADER: [u8; 8] = [
440            WASM_MAGIC_NUMBER[0],
441            WASM_MAGIC_NUMBER[1],
442            WASM_MAGIC_NUMBER[2],
443            WASM_MAGIC_NUMBER[3],
444            WASM_COMPONENT_VERSION.to_le_bytes()[0],
445            WASM_COMPONENT_VERSION.to_le_bytes()[1],
446            KIND_COMPONENT.to_le_bytes()[0],
447            KIND_COMPONENT.to_le_bytes()[1],
448        ];
449        bytes.starts_with(&HEADER)
450    }
451
452    /// Returns the currently active set of wasm features that this parser is
453    /// using while parsing.
454    ///
455    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
456    ///
457    /// For more information see [`BinaryReader::new`].
458    #[cfg(feature = "features")]
459    pub fn features(&self) -> WasmFeatures {
460        self.features
461    }
462
463    /// Sets the wasm features active while parsing to the `features` specified.
464    ///
465    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
466    ///
467    /// For more information see [`BinaryReader::new`].
468    #[cfg(feature = "features")]
469    pub fn set_features(&mut self, features: WasmFeatures) {
470        self.features = features;
471    }
472
473    /// Returns the original offset that this parser is currently at.
474    pub fn offset(&self) -> u64 {
475        self.offset
476    }
477
478    /// Attempts to parse a chunk of data.
479    ///
480    /// This method will attempt to parse the next incremental portion of a
481    /// WebAssembly binary. Data available for the module or component is
482    /// provided as `data`, and the data can be incomplete if more data has yet
483    /// to arrive. The `eof` flag indicates whether more data will ever be received.
484    ///
485    /// There are two ways parsing can succeed with this method:
486    ///
487    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
488    ///   in `data` to parse a payload. The caller needs to wait for more data to
489    ///   be available in this situation before calling this method again. It is
490    ///   guaranteed that this is only returned if `eof` is `false`.
491    ///
492    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
493    ///   successfully parsed. The payload is available in this variant of what
494    ///   was parsed, and this also indicates how many bytes of `data` was
495    ///   consumed. It's expected that the caller will not provide these bytes
496    ///   back to the [`Parser`] again.
497    ///
498    /// Note that all `Chunk` return values are connected, with a lifetime, to
499    /// the input buffer. Each parsed chunk borrows the input buffer and is a
500    /// view into it for successfully parsed chunks.
501    ///
502    /// It is expected that you'll call this method until `Payload::End` is
503    /// reached, at which point you're guaranteed that the parse has completed.
504    /// Note that complete parsing, for the top-level module or component,
505    /// implies that `data` is empty and `eof` is `true`.
506    ///
507    /// # Errors
508    ///
509    /// Parse errors are returned as an `Err`. Errors can happen when the
510    /// structure of the data is unexpected or if sections are too large for
511    /// example. Note that errors are not returned for malformed *contents* of
512    /// sections here. Sections are generally not individually parsed and each
513    /// returned [`Payload`] needs to be iterated over further to detect all
514    /// errors.
515    ///
516    /// # Examples
517    ///
518    /// An example of reading a wasm file from a stream (`std::io::Read`) and
519    /// incrementally parsing it.
520    ///
521    /// ```
522    /// use std::io::Read;
523    /// use anyhow::Result;
524    /// use wasmparser::{Parser, Chunk, Payload::*};
525    ///
526    /// fn parse(mut reader: impl Read) -> Result<()> {
527    ///     let mut buf = Vec::new();
528    ///     let mut cur = Parser::new(0);
529    ///     let mut eof = false;
530    ///     let mut stack = Vec::new();
531    ///
532    ///     loop {
533    ///         let (payload, consumed) = match cur.parse(&buf, eof)? {
534    ///             Chunk::NeedMoreData(hint) => {
535    ///                 assert!(!eof); // otherwise an error would be returned
536    ///
537    ///                 // Use the hint to preallocate more space, then read
538    ///                 // some more data into our buffer.
539    ///                 //
540    ///                 // Note that the buffer management here is not ideal,
541    ///                 // but it's compact enough to fit in an example!
542    ///                 let len = buf.len();
543    ///                 buf.extend((0..hint).map(|_| 0u8));
544    ///                 let n = reader.read(&mut buf[len..])?;
545    ///                 buf.truncate(len + n);
546    ///                 eof = n == 0;
547    ///                 continue;
548    ///             }
549    ///
550    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
551    ///         };
552    ///
553    ///         match payload {
554    ///             // Sections for WebAssembly modules
555    ///             Version { .. } => { /* ... */ }
556    ///             TypeSection(_) => { /* ... */ }
557    ///             ImportSection(_) => { /* ... */ }
558    ///             FunctionSection(_) => { /* ... */ }
559    ///             TableSection(_) => { /* ... */ }
560    ///             MemorySection(_) => { /* ... */ }
561    ///             TagSection(_) => { /* ... */ }
562    ///             GlobalSection(_) => { /* ... */ }
563    ///             ExportSection(_) => { /* ... */ }
564    ///             StartSection { .. } => { /* ... */ }
565    ///             ElementSection(_) => { /* ... */ }
566    ///             DataCountSection { .. } => { /* ... */ }
567    ///             DataSection(_) => { /* ... */ }
568    ///
569    ///             // Here we know how many functions we'll be receiving as
570    ///             // `CodeSectionEntry`, so we can prepare for that, and
571    ///             // afterwards we can parse and handle each function
572    ///             // individually.
573    ///             CodeSectionStart { .. } => { /* ... */ }
574    ///             CodeSectionEntry(body) => {
575    ///                 // here we can iterate over `body` to parse the function
576    ///                 // and its locals
577    ///             }
578    ///
579    ///             // Sections for WebAssembly components
580    ///             InstanceSection(_) => { /* ... */ }
581    ///             CoreTypeSection(_) => { /* ... */ }
582    ///             ComponentInstanceSection(_) => { /* ... */ }
583    ///             ComponentAliasSection(_) => { /* ... */ }
584    ///             ComponentTypeSection(_) => { /* ... */ }
585    ///             ComponentCanonicalSection(_) => { /* ... */ }
586    ///             ComponentStartSection { .. } => { /* ... */ }
587    ///             ComponentImportSection(_) => { /* ... */ }
588    ///             ComponentExportSection(_) => { /* ... */ }
589    ///
590    ///             ModuleSection { parser, .. }
591    ///             | ComponentSection { parser, .. } => {
592    ///                 stack.push(cur.clone());
593    ///                 cur = parser.clone();
594    ///             }
595    ///
596    ///             CustomSection(_) => { /* ... */ }
597    ///
598    ///             // Once we've reached the end of a parser we either resume
599    ///             // at the parent parser or we break out of the loop because
600    ///             // we're done.
601    ///             End(_) => {
602    ///                 if let Some(parent_parser) = stack.pop() {
603    ///                     cur = parent_parser;
604    ///                 } else {
605    ///                     break;
606    ///                 }
607    ///             }
608    ///
609    ///             // most likely you'd return an error here
610    ///             _ => { /* ... */ }
611    ///         }
612    ///
613    ///         // once we're done processing the payload we can forget the
614    ///         // original.
615    ///         buf.drain(..consumed);
616    ///     }
617    ///
618    ///     Ok(())
619    /// }
620    ///
621    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
622    /// ```
623    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
624        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
625            (&data[..(self.max_size as usize)], true)
626        } else {
627            (data, eof)
628        };
629        // TODO: thread through `offset: u64` to `BinaryReader`, remove
630        // the cast here.
631        let starting_offset = self.offset as usize;
632        let mut reader = BinaryReader::new(data, starting_offset);
633        #[cfg(feature = "features")]
634        {
635            reader.set_features(self.features);
636        }
637        match self.parse_reader(&mut reader, eof) {
638            Ok(payload) => {
639                // Be sure to update our offset with how far we got in the
640                // reader
641                let consumed = reader.original_position() - starting_offset;
642                self.offset += usize_to_u64(consumed);
643                self.max_size -= usize_to_u64(consumed);
644                Ok(Chunk::Parsed {
645                    consumed: consumed,
646                    payload,
647                })
648            }
649            Err(e) => {
650                // If we're at EOF then there's no way we can recover from any
651                // error, so continue to propagate it.
652                if eof {
653                    return Err(e);
654                }
655
656                // If our error doesn't look like it can be resolved with more
657                // data being pulled down, then propagate it, otherwise switch
658                // the error to "feed me please"
659                match e.inner.needed_hint {
660                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
661                    None => Err(e),
662                }
663            }
664        }
665    }
666
667    fn update_order(&mut self, order: Order, pos: usize) -> Result<()> {
668        let pos_u64 = usize_to_u64(pos);
669        if self.encoding == Encoding::Module {
670            match self.order {
671                (last_order, last_pos) if last_order >= order && last_pos < pos_u64 => {
672                    bail!(pos, "section out of order")
673                }
674                _ => (),
675            }
676        }
677
678        self.order = (order, pos_u64);
679
680        Ok(())
681    }
682
683    fn parse_reader<'a>(
684        &mut self,
685        reader: &mut BinaryReader<'a>,
686        eof: bool,
687    ) -> Result<Payload<'a>> {
688        use Payload::*;
689
690        match self.state {
691            State::Header => {
692                let start = reader.original_position();
693                let header_version = reader.read_header_version()?;
694                let num = header_version as u16;
695                self.encoding = match (num, (header_version >> 16) as u16) {
696                    (WASM_MODULE_VERSION, KIND_MODULE) => Encoding::Module,
697                    (WASM_COMPONENT_VERSION, KIND_COMPONENT) => Encoding::Component,
698                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
699                };
700                self.state = State::SectionStart;
701                Ok(Version {
702                    num,
703                    encoding: self.encoding,
704                    range: start..reader.original_position(),
705                })
706            }
707            State::SectionStart => {
708                // If we're at eof and there are no bytes in our buffer, then
709                // that means we reached the end of the data since it's
710                // just a bunch of sections concatenated after the header.
711                if eof && reader.bytes_remaining() == 0 {
712                    self.check_function_code_counts(reader.original_position())?;
713                    self.check_data_count(reader.original_position())?;
714                    return Ok(Payload::End(reader.original_position()));
715                }
716
717                let id_pos = reader.original_position();
718                let id = reader.read_u8()?;
719                if id & 0x80 != 0 {
720                    return Err(BinaryReaderError::new("malformed section id", id_pos));
721                }
722                let len_pos = reader.original_position();
723                let mut len = reader.read_var_u32()?;
724
725                // Test to make sure that this section actually fits within
726                // `Parser::max_size`. This doesn't matter for top-level modules
727                // but it is required for nested modules/components to correctly ensure
728                // that all sections live entirely within their section of the
729                // file.
730                let consumed = reader.original_position() - id_pos;
731                let section_overflow = self
732                    .max_size
733                    .checked_sub(usize_to_u64(consumed))
734                    .and_then(|s| s.checked_sub(len.into()))
735                    .is_none();
736                if section_overflow {
737                    return Err(BinaryReaderError::new("section too large", len_pos));
738                }
739
740                match (self.encoding, id) {
741                    // Sections for both modules and components.
742                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
743
744                    // Module sections
745                    (Encoding::Module, TYPE_SECTION) => {
746                        self.update_order(Order::Type, reader.original_position())?;
747                        section(reader, len, TypeSectionReader::new, TypeSection)
748                    }
749                    (Encoding::Module, IMPORT_SECTION) => {
750                        self.update_order(Order::Import, reader.original_position())?;
751                        section(reader, len, ImportSectionReader::new, ImportSection)
752                    }
753                    (Encoding::Module, FUNCTION_SECTION) => {
754                        self.update_order(Order::Function, reader.original_position())?;
755                        let s = section(reader, len, FunctionSectionReader::new, FunctionSection)?;
756                        match &s {
757                            FunctionSection(f) => self.counts.function_entries = Some(f.count()),
758                            _ => unreachable!(),
759                        }
760                        Ok(s)
761                    }
762                    (Encoding::Module, TABLE_SECTION) => {
763                        self.update_order(Order::Table, reader.original_position())?;
764                        section(reader, len, TableSectionReader::new, TableSection)
765                    }
766                    (Encoding::Module, MEMORY_SECTION) => {
767                        self.update_order(Order::Memory, reader.original_position())?;
768                        section(reader, len, MemorySectionReader::new, MemorySection)
769                    }
770                    (Encoding::Module, GLOBAL_SECTION) => {
771                        self.update_order(Order::Global, reader.original_position())?;
772                        section(reader, len, GlobalSectionReader::new, GlobalSection)
773                    }
774                    (Encoding::Module, EXPORT_SECTION) => {
775                        self.update_order(Order::Export, reader.original_position())?;
776                        section(reader, len, ExportSectionReader::new, ExportSection)
777                    }
778                    (Encoding::Module, START_SECTION) => {
779                        self.update_order(Order::Start, reader.original_position())?;
780                        let (func, range) = single_item(reader, len, "start")?;
781                        Ok(StartSection { func, range })
782                    }
783                    (Encoding::Module, ELEMENT_SECTION) => {
784                        self.update_order(Order::Element, reader.original_position())?;
785                        section(reader, len, ElementSectionReader::new, ElementSection)
786                    }
787                    (Encoding::Module, CODE_SECTION) => {
788                        self.update_order(Order::Code, reader.original_position())?;
789                        let start = reader.original_position();
790                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
791                        self.counts.code_entries = Some(count);
792                        self.check_function_code_counts(start)?;
793                        let range = start..reader.original_position() + len as usize;
794                        self.state = State::FunctionBody {
795                            remaining: count,
796                            len,
797                        };
798                        Ok(CodeSectionStart {
799                            count,
800                            range,
801                            size: len,
802                        })
803                    }
804                    (Encoding::Module, DATA_SECTION) => {
805                        self.update_order(Order::Data, reader.original_position())?;
806                        let s = section(reader, len, DataSectionReader::new, DataSection)?;
807                        match &s {
808                            DataSection(d) => self.counts.data_entries = Some(d.count()),
809                            _ => unreachable!(),
810                        }
811                        self.check_data_count(reader.original_position())?;
812                        Ok(s)
813                    }
814                    (Encoding::Module, DATA_COUNT_SECTION) => {
815                        self.update_order(Order::DataCount, reader.original_position())?;
816                        let (count, range) = single_item(reader, len, "data count")?;
817                        self.counts.data_count = Some(count);
818                        Ok(DataCountSection { count, range })
819                    }
820                    (Encoding::Module, TAG_SECTION) => {
821                        self.update_order(Order::Tag, reader.original_position())?;
822                        section(reader, len, TagSectionReader::new, TagSection)
823                    }
824
825                    // Component sections
826                    #[cfg(feature = "component-model")]
827                    (Encoding::Component, COMPONENT_MODULE_SECTION)
828                    | (Encoding::Component, COMPONENT_SECTION) => {
829                        if len as usize > MAX_WASM_MODULE_SIZE {
830                            bail!(
831                                len_pos,
832                                "{} section is too large",
833                                if id == 1 { "module" } else { "component " }
834                            );
835                        }
836
837                        let range = reader.original_position()
838                            ..reader.original_position() + usize::try_from(len).unwrap();
839                        self.max_size -= u64::from(len);
840                        self.offset += u64::from(len);
841                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
842                        #[cfg(feature = "features")]
843                        {
844                            parser.features = self.features;
845                        }
846                        parser.max_size = u64::from(len);
847
848                        Ok(match id {
849                            1 => ModuleSection {
850                                parser,
851                                unchecked_range: range,
852                            },
853                            4 => ComponentSection {
854                                parser,
855                                unchecked_range: range,
856                            },
857                            _ => unreachable!(),
858                        })
859                    }
860                    #[cfg(feature = "component-model")]
861                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
862                        section(reader, len, InstanceSectionReader::new, InstanceSection)
863                    }
864                    #[cfg(feature = "component-model")]
865                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
866                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
867                    }
868                    #[cfg(feature = "component-model")]
869                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
870                        reader,
871                        len,
872                        ComponentInstanceSectionReader::new,
873                        ComponentInstanceSection,
874                    ),
875                    #[cfg(feature = "component-model")]
876                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
877                        section(reader, len, SectionLimited::new, ComponentAliasSection)
878                    }
879                    #[cfg(feature = "component-model")]
880                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
881                        reader,
882                        len,
883                        ComponentTypeSectionReader::new,
884                        ComponentTypeSection,
885                    ),
886                    #[cfg(feature = "component-model")]
887                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
888                        reader,
889                        len,
890                        ComponentCanonicalSectionReader::new,
891                        ComponentCanonicalSection,
892                    ),
893                    #[cfg(feature = "component-model")]
894                    (Encoding::Component, COMPONENT_START_SECTION) => {
895                        match self.counts.component_start_sections {
896                            false => self.counts.component_start_sections = true,
897                            true => {
898                                bail!(
899                                    reader.original_position(),
900                                    "component cannot have more than one start function"
901                                )
902                            }
903                        }
904                        let (start, range) = single_item(reader, len, "component start")?;
905                        Ok(ComponentStartSection { start, range })
906                    }
907                    #[cfg(feature = "component-model")]
908                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
909                        reader,
910                        len,
911                        ComponentImportSectionReader::new,
912                        ComponentImportSection,
913                    ),
914                    #[cfg(feature = "component-model")]
915                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
916                        reader,
917                        len,
918                        ComponentExportSectionReader::new,
919                        ComponentExportSection,
920                    ),
921                    (_, id) => {
922                        let offset = reader.original_position();
923                        let contents = reader.read_bytes(len as usize)?;
924                        let range = offset..offset + len as usize;
925                        Ok(UnknownSection {
926                            id,
927                            contents,
928                            range,
929                        })
930                    }
931                }
932            }
933
934            // Once we hit 0 remaining incrementally parsed items, with 0
935            // remaining bytes in each section, we're done and can switch back
936            // to parsing sections.
937            State::FunctionBody {
938                remaining: 0,
939                len: 0,
940            } => {
941                self.state = State::SectionStart;
942                self.parse_reader(reader, eof)
943            }
944
945            // ... otherwise trailing bytes with no remaining entries in these
946            // sections indicates an error.
947            State::FunctionBody { remaining: 0, len } => {
948                debug_assert!(len > 0);
949                let offset = reader.original_position();
950                Err(BinaryReaderError::new(
951                    "trailing bytes at end of section",
952                    offset,
953                ))
954            }
955
956            // Functions are relatively easy to parse when we know there's at
957            // least one remaining and at least one byte available to read
958            // things.
959            //
960            // We use the remaining length try to read a u32 size of the
961            // function, and using that size we require the entire function be
962            // resident in memory. This means that we're reading whole chunks of
963            // functions at a time.
964            //
965            // Limiting via `Parser::max_size` (nested parsing) happens above in
966            // `fn parse`, and limiting by our section size happens via
967            // `delimited`. Actual parsing of the function body is delegated to
968            // the caller to iterate over the `FunctionBody` structure.
969            State::FunctionBody { remaining, mut len } => {
970                let body = delimited(reader, &mut len, |r| {
971                    Ok(FunctionBody::new(r.read_reader()?))
972                })?;
973                self.state = State::FunctionBody {
974                    remaining: remaining - 1,
975                    len,
976                };
977                Ok(CodeSectionEntry(body))
978            }
979        }
980    }
981
982    /// Convenience function that can be used to parse a module or component
983    /// that is entirely resident in memory.
984    ///
985    /// This function will parse the `data` provided as a WebAssembly module
986    /// or component.
987    ///
988    /// Note that when this function yields sections that provide parsers,
989    /// no further action is required for those sections as payloads from
990    /// those parsers will be automatically returned.
991    ///
992    /// # Examples
993    ///
994    /// An example of reading a wasm file from a stream (`std::io::Read`) into
995    /// a buffer and then parsing it.
996    ///
997    /// ```
998    /// use std::io::Read;
999    /// use anyhow::Result;
1000    /// use wasmparser::{Parser, Chunk, Payload::*};
1001    ///
1002    /// fn parse(mut reader: impl Read) -> Result<()> {
1003    ///     let mut buf = Vec::new();
1004    ///     reader.read_to_end(&mut buf)?;
1005    ///     let parser = Parser::new(0);
1006    ///
1007    ///     for payload in parser.parse_all(&buf) {
1008    ///         match payload? {
1009    ///             // Sections for WebAssembly modules
1010    ///             Version { .. } => { /* ... */ }
1011    ///             TypeSection(_) => { /* ... */ }
1012    ///             ImportSection(_) => { /* ... */ }
1013    ///             FunctionSection(_) => { /* ... */ }
1014    ///             TableSection(_) => { /* ... */ }
1015    ///             MemorySection(_) => { /* ... */ }
1016    ///             TagSection(_) => { /* ... */ }
1017    ///             GlobalSection(_) => { /* ... */ }
1018    ///             ExportSection(_) => { /* ... */ }
1019    ///             StartSection { .. } => { /* ... */ }
1020    ///             ElementSection(_) => { /* ... */ }
1021    ///             DataCountSection { .. } => { /* ... */ }
1022    ///             DataSection(_) => { /* ... */ }
1023    ///
1024    ///             // Here we know how many functions we'll be receiving as
1025    ///             // `CodeSectionEntry`, so we can prepare for that, and
1026    ///             // afterwards we can parse and handle each function
1027    ///             // individually.
1028    ///             CodeSectionStart { .. } => { /* ... */ }
1029    ///             CodeSectionEntry(body) => {
1030    ///                 // here we can iterate over `body` to parse the function
1031    ///                 // and its locals
1032    ///             }
1033    ///
1034    ///             // Sections for WebAssembly components
1035    ///             ModuleSection { .. } => { /* ... */ }
1036    ///             InstanceSection(_) => { /* ... */ }
1037    ///             CoreTypeSection(_) => { /* ... */ }
1038    ///             ComponentSection { .. } => { /* ... */ }
1039    ///             ComponentInstanceSection(_) => { /* ... */ }
1040    ///             ComponentAliasSection(_) => { /* ... */ }
1041    ///             ComponentTypeSection(_) => { /* ... */ }
1042    ///             ComponentCanonicalSection(_) => { /* ... */ }
1043    ///             ComponentStartSection { .. } => { /* ... */ }
1044    ///             ComponentImportSection(_) => { /* ... */ }
1045    ///             ComponentExportSection(_) => { /* ... */ }
1046    ///
1047    ///             CustomSection(_) => { /* ... */ }
1048    ///
1049    ///             // Once we've reached the end of a parser we either resume
1050    ///             // at the parent parser or the payload iterator is at its
1051    ///             // end and we're done.
1052    ///             End(_) => {}
1053    ///
1054    ///             // most likely you'd return an error here, but if you want
1055    ///             // you can also inspect the raw contents of unknown sections
1056    ///             other => {
1057    ///                 match other.as_section() {
1058    ///                     Some((id, range)) => { /* ... */ }
1059    ///                     None => { /* ... */ }
1060    ///                 }
1061    ///             }
1062    ///         }
1063    ///     }
1064    ///
1065    ///     Ok(())
1066    /// }
1067    ///
1068    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
1069    /// ```
1070    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload<'_>>> {
1071        let mut stack = Vec::new();
1072        let mut cur = self;
1073        let mut done = false;
1074        iter::from_fn(move || {
1075            if done {
1076                return None;
1077            }
1078            let payload = match cur.parse(data, true) {
1079                // Propagate all errors
1080                Err(e) => {
1081                    done = true;
1082                    return Some(Err(e));
1083                }
1084
1085                // This isn't possible because `eof` is always true.
1086                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
1087
1088                Ok(Chunk::Parsed { payload, consumed }) => {
1089                    data = &data[consumed..];
1090                    payload
1091                }
1092            };
1093
1094            match &payload {
1095                #[cfg(feature = "component-model")]
1096                Payload::ModuleSection { parser, .. }
1097                | Payload::ComponentSection { parser, .. } => {
1098                    stack.push(cur.clone());
1099                    cur = parser.clone();
1100                }
1101                Payload::End(_) => match stack.pop() {
1102                    Some(p) => cur = p,
1103                    None => done = true,
1104                },
1105
1106                _ => {}
1107            }
1108
1109            Some(Ok(payload))
1110        })
1111    }
1112
1113    /// Skip parsing the code section entirely.
1114    ///
1115    /// This function can be used to indicate, after receiving
1116    /// `CodeSectionStart`, that the section will not be parsed.
1117    ///
1118    /// The caller will be responsible for skipping `size` bytes (found in the
1119    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1120    /// after the `size` bytes have been skipped.
1121    ///
1122    /// # Panics
1123    ///
1124    /// This function will panic if the parser is not in a state where it's
1125    /// parsing the code section.
1126    ///
1127    /// # Examples
1128    ///
1129    /// ```
1130    /// use wasmparser::{Result, Parser, Chunk, Payload::*};
1131    /// use core::ops::Range;
1132    ///
1133    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1134    ///     let mut parser = Parser::new(0);
1135    ///     loop {
1136    ///         let payload = match parser.parse(wasm, true)? {
1137    ///             Chunk::Parsed { consumed, payload } => {
1138    ///                 wasm = &wasm[consumed..];
1139    ///                 payload
1140    ///             }
1141    ///             // this state isn't possible with `eof = true`
1142    ///             Chunk::NeedMoreData(_) => unreachable!(),
1143    ///         };
1144    ///         match payload {
1145    ///             TypeSection(s) => print_range("type section", &s.range()),
1146    ///             ImportSection(s) => print_range("import section", &s.range()),
1147    ///             // .. other sections
1148    ///
1149    ///             // Print the range of the code section we see, but don't
1150    ///             // actually iterate over each individual function.
1151    ///             CodeSectionStart { range, size, .. } => {
1152    ///                 print_range("code section", &range);
1153    ///                 parser.skip_section();
1154    ///                 wasm = &wasm[size as usize..];
1155    ///             }
1156    ///             End(_) => break,
1157    ///             _ => {}
1158    ///         }
1159    ///     }
1160    ///     Ok(())
1161    /// }
1162    ///
1163    /// fn print_range(section: &str, range: &Range<usize>) {
1164    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1165    /// }
1166    /// ```
1167    pub fn skip_section(&mut self) {
1168        let skip = match self.state {
1169            State::FunctionBody { remaining: _, len } => len,
1170            _ => panic!("wrong state to call `skip_section`"),
1171        };
1172        self.offset += u64::from(skip);
1173        self.max_size -= u64::from(skip);
1174        self.state = State::SectionStart;
1175    }
1176
1177    fn check_function_code_counts(&self, pos: usize) -> Result<()> {
1178        match (self.counts.function_entries, self.counts.code_entries) {
1179            (Some(n), Some(m)) if n != m => {
1180                bail!(pos, "function and code section have inconsistent lengths")
1181            }
1182            (Some(n), None) if n > 0 => bail!(
1183                pos,
1184                "function section has non-zero count but code section is absent"
1185            ),
1186            (None, Some(m)) if m > 0 => bail!(
1187                pos,
1188                "function section is absent but code section has non-zero count"
1189            ),
1190            _ => Ok(()),
1191        }
1192    }
1193
1194    fn check_data_count(&self, pos: usize) -> Result<()> {
1195        match (self.counts.data_count, self.counts.data_entries) {
1196            (Some(n), Some(m)) if n != m => {
1197                bail!(pos, "data count and data section have inconsistent lengths")
1198            }
1199            (Some(n), None) if n > 0 => {
1200                bail!(pos, "data count is non-zero but data section is absent")
1201            }
1202            _ => Ok(()),
1203        }
1204    }
1205}
1206
1207fn usize_to_u64(a: usize) -> u64 {
1208    a.try_into().unwrap()
1209}
1210
1211/// Parses an entire section resident in memory into a `Payload`.
1212///
1213/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1214/// to construct the section to return.
1215fn section<'a, T>(
1216    reader: &mut BinaryReader<'a>,
1217    len: u32,
1218    ctor: fn(BinaryReader<'a>) -> Result<T>,
1219    variant: fn(T) -> Payload<'a>,
1220) -> Result<Payload<'a>> {
1221    let reader = reader.skip(|r| {
1222        r.read_bytes(len as usize)?;
1223        Ok(())
1224    })?;
1225    // clear the hint for "need this many more bytes" here because we already
1226    // read all the bytes, so it's not possible to read more bytes if this
1227    // fails.
1228    let reader = ctor(reader).map_err(clear_hint)?;
1229    Ok(variant(reader))
1230}
1231
1232/// Reads a section that is represented by a single uleb-encoded `u32`.
1233fn single_item<'a, T>(
1234    reader: &mut BinaryReader<'a>,
1235    len: u32,
1236    desc: &str,
1237) -> Result<(T, Range<usize>)>
1238where
1239    T: FromReader<'a>,
1240{
1241    let range = reader.original_position()..reader.original_position() + len as usize;
1242    let mut content = reader.skip(|r| {
1243        r.read_bytes(len as usize)?;
1244        Ok(())
1245    })?;
1246    // We can't recover from "unexpected eof" here because our entire section is
1247    // already resident in memory, so clear the hint for how many more bytes are
1248    // expected.
1249    let ret = content.read().map_err(clear_hint)?;
1250    if !content.eof() {
1251        bail!(
1252            content.original_position(),
1253            "unexpected content in the {desc} section",
1254        );
1255    }
1256    Ok((ret, range))
1257}
1258
1259/// Attempts to parse using `f`.
1260///
1261/// This will update `*len` with the number of bytes consumed, and it will cause
1262/// a failure to be returned instead of the number of bytes consumed exceeds
1263/// what `*len` currently is.
1264fn delimited<'a, T>(
1265    reader: &mut BinaryReader<'a>,
1266    len: &mut u32,
1267    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1268) -> Result<T> {
1269    let start = reader.original_position();
1270    let ret = f(reader)?;
1271    *len = match (reader.original_position() - start)
1272        .try_into()
1273        .ok()
1274        .and_then(|i| len.checked_sub(i))
1275    {
1276        Some(i) => i,
1277        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1278    };
1279    Ok(ret)
1280}
1281
1282impl Default for Parser {
1283    fn default() -> Parser {
1284        Parser::new(0)
1285    }
1286}
1287
1288impl Payload<'_> {
1289    /// If this `Payload` represents a section in the original wasm module then
1290    /// the section's id and range within the original wasm binary are returned.
1291    ///
1292    /// Not all payloads refer to entire sections, such as the `Version` and
1293    /// `CodeSectionEntry` variants. These variants will return `None` from this
1294    /// function.
1295    ///
1296    /// Otherwise this function will return `Some` where the first element is
1297    /// the byte identifier for the section and the second element is the range
1298    /// of the contents of the section within the original wasm binary.
1299    ///
1300    /// The purpose of this method is to enable tools to easily iterate over
1301    /// entire sections if necessary and handle sections uniformly, for example
1302    /// dropping custom sections while preserving all other sections.
1303    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1304        use Payload::*;
1305
1306        match self {
1307            Version { .. } => None,
1308            TypeSection(s) => Some((TYPE_SECTION, s.range())),
1309            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1310            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1311            TableSection(s) => Some((TABLE_SECTION, s.range())),
1312            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1313            TagSection(s) => Some((TAG_SECTION, s.range())),
1314            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1315            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1316            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1317            DataSection(s) => Some((DATA_SECTION, s.range())),
1318            StartSection { range, .. } => Some((START_SECTION, range.clone())),
1319            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1320            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1321            CodeSectionEntry(_) => None,
1322
1323            #[cfg(feature = "component-model")]
1324            ModuleSection {
1325                unchecked_range: range,
1326                ..
1327            } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1328            #[cfg(feature = "component-model")]
1329            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1330            #[cfg(feature = "component-model")]
1331            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1332            #[cfg(feature = "component-model")]
1333            ComponentSection {
1334                unchecked_range: range,
1335                ..
1336            } => Some((COMPONENT_SECTION, range.clone())),
1337            #[cfg(feature = "component-model")]
1338            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1339            #[cfg(feature = "component-model")]
1340            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1341            #[cfg(feature = "component-model")]
1342            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1343            #[cfg(feature = "component-model")]
1344            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1345            #[cfg(feature = "component-model")]
1346            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1347            #[cfg(feature = "component-model")]
1348            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1349            #[cfg(feature = "component-model")]
1350            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1351
1352            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1353
1354            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1355
1356            End(_) => None,
1357        }
1358    }
1359}
1360
1361impl fmt::Debug for Payload<'_> {
1362    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1363        use Payload::*;
1364        match self {
1365            Version {
1366                num,
1367                encoding,
1368                range,
1369            } => f
1370                .debug_struct("Version")
1371                .field("num", num)
1372                .field("encoding", encoding)
1373                .field("range", range)
1374                .finish(),
1375
1376            // Module sections
1377            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1378            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1379            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1380            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1381            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1382            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1383            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1384            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1385            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1386            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1387            StartSection { func, range } => f
1388                .debug_struct("StartSection")
1389                .field("func", func)
1390                .field("range", range)
1391                .finish(),
1392            DataCountSection { count, range } => f
1393                .debug_struct("DataCountSection")
1394                .field("count", count)
1395                .field("range", range)
1396                .finish(),
1397            CodeSectionStart { count, range, size } => f
1398                .debug_struct("CodeSectionStart")
1399                .field("count", count)
1400                .field("range", range)
1401                .field("size", size)
1402                .finish(),
1403            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1404
1405            // Component sections
1406            #[cfg(feature = "component-model")]
1407            ModuleSection {
1408                parser: _,
1409                unchecked_range: range,
1410            } => f
1411                .debug_struct("ModuleSection")
1412                .field("range", range)
1413                .finish(),
1414            #[cfg(feature = "component-model")]
1415            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1416            #[cfg(feature = "component-model")]
1417            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1418            #[cfg(feature = "component-model")]
1419            ComponentSection {
1420                parser: _,
1421                unchecked_range: range,
1422            } => f
1423                .debug_struct("ComponentSection")
1424                .field("range", range)
1425                .finish(),
1426            #[cfg(feature = "component-model")]
1427            ComponentInstanceSection(_) => f
1428                .debug_tuple("ComponentInstanceSection")
1429                .field(&"...")
1430                .finish(),
1431            #[cfg(feature = "component-model")]
1432            ComponentAliasSection(_) => f
1433                .debug_tuple("ComponentAliasSection")
1434                .field(&"...")
1435                .finish(),
1436            #[cfg(feature = "component-model")]
1437            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1438            #[cfg(feature = "component-model")]
1439            ComponentCanonicalSection(_) => f
1440                .debug_tuple("ComponentCanonicalSection")
1441                .field(&"...")
1442                .finish(),
1443            #[cfg(feature = "component-model")]
1444            ComponentStartSection { .. } => f
1445                .debug_tuple("ComponentStartSection")
1446                .field(&"...")
1447                .finish(),
1448            #[cfg(feature = "component-model")]
1449            ComponentImportSection(_) => f
1450                .debug_tuple("ComponentImportSection")
1451                .field(&"...")
1452                .finish(),
1453            #[cfg(feature = "component-model")]
1454            ComponentExportSection(_) => f
1455                .debug_tuple("ComponentExportSection")
1456                .field(&"...")
1457                .finish(),
1458
1459            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1460
1461            UnknownSection { id, range, .. } => f
1462                .debug_struct("UnknownSection")
1463                .field("id", id)
1464                .field("range", range)
1465                .finish(),
1466
1467            End(offset) => f.debug_tuple("End").field(offset).finish(),
1468        }
1469    }
1470}
1471
1472fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1473    err.inner.needed_hint = None;
1474    err
1475}
1476
1477#[cfg(test)]
1478mod tests {
1479    use super::*;
1480
1481    macro_rules! assert_matches {
1482        ($a:expr, $b:pat $(,)?) => {
1483            match $a {
1484                $b => {}
1485                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1486            }
1487        };
1488    }
1489
1490    #[test]
1491    fn header() {
1492        assert!(Parser::default().parse(&[], true).is_err());
1493        assert_matches!(
1494            Parser::default().parse(&[], false),
1495            Ok(Chunk::NeedMoreData(4)),
1496        );
1497        assert_matches!(
1498            Parser::default().parse(b"\0", false),
1499            Ok(Chunk::NeedMoreData(3)),
1500        );
1501        assert_matches!(
1502            Parser::default().parse(b"\0asm", false),
1503            Ok(Chunk::NeedMoreData(4)),
1504        );
1505        assert_matches!(
1506            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1507            Ok(Chunk::Parsed {
1508                consumed: 8,
1509                payload: Payload::Version { num: 1, .. },
1510            }),
1511        );
1512    }
1513
1514    #[test]
1515    fn header_iter() {
1516        for _ in Parser::default().parse_all(&[]) {}
1517        for _ in Parser::default().parse_all(b"\0") {}
1518        for _ in Parser::default().parse_all(b"\0asm") {}
1519        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1520    }
1521
1522    fn parser_after_header() -> Parser {
1523        let mut p = Parser::default();
1524        assert_matches!(
1525            p.parse(b"\0asm\x01\0\0\0", false),
1526            Ok(Chunk::Parsed {
1527                consumed: 8,
1528                payload: Payload::Version {
1529                    num: WASM_MODULE_VERSION,
1530                    encoding: Encoding::Module,
1531                    ..
1532                },
1533            }),
1534        );
1535        p
1536    }
1537
1538    fn parser_after_component_header() -> Parser {
1539        let mut p = Parser::default();
1540        assert_matches!(
1541            p.parse(b"\0asm\x0d\0\x01\0", false),
1542            Ok(Chunk::Parsed {
1543                consumed: 8,
1544                payload: Payload::Version {
1545                    num: WASM_COMPONENT_VERSION,
1546                    encoding: Encoding::Component,
1547                    ..
1548                },
1549            }),
1550        );
1551        p
1552    }
1553
1554    #[test]
1555    fn start_section() {
1556        assert_matches!(
1557            parser_after_header().parse(&[], false),
1558            Ok(Chunk::NeedMoreData(1)),
1559        );
1560        assert!(parser_after_header().parse(&[8], true).is_err());
1561        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1562        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1563        assert_matches!(
1564            parser_after_header().parse(&[8], false),
1565            Ok(Chunk::NeedMoreData(1)),
1566        );
1567        assert_matches!(
1568            parser_after_header().parse(&[8, 1], false),
1569            Ok(Chunk::NeedMoreData(1)),
1570        );
1571        assert_matches!(
1572            parser_after_header().parse(&[8, 2], false),
1573            Ok(Chunk::NeedMoreData(2)),
1574        );
1575        assert_matches!(
1576            parser_after_header().parse(&[8, 1, 1], false),
1577            Ok(Chunk::Parsed {
1578                consumed: 3,
1579                payload: Payload::StartSection { func: 1, .. },
1580            }),
1581        );
1582        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1583        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1584    }
1585
1586    #[test]
1587    fn end_works() {
1588        assert_matches!(
1589            parser_after_header().parse(&[], true),
1590            Ok(Chunk::Parsed {
1591                consumed: 0,
1592                payload: Payload::End(8),
1593            }),
1594        );
1595    }
1596
1597    #[test]
1598    fn type_section() {
1599        assert!(parser_after_header().parse(&[1], true).is_err());
1600        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1601        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1602        assert_matches!(
1603            parser_after_header().parse(&[1], false),
1604            Ok(Chunk::NeedMoreData(1)),
1605        );
1606        assert_matches!(
1607            parser_after_header().parse(&[1, 1], false),
1608            Ok(Chunk::NeedMoreData(1)),
1609        );
1610        assert_matches!(
1611            parser_after_header().parse(&[1, 1, 1], false),
1612            Ok(Chunk::Parsed {
1613                consumed: 3,
1614                payload: Payload::TypeSection(_),
1615            }),
1616        );
1617        assert_matches!(
1618            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1619            Ok(Chunk::Parsed {
1620                consumed: 3,
1621                payload: Payload::TypeSection(_),
1622            }),
1623        );
1624    }
1625
1626    #[test]
1627    fn custom_section() {
1628        assert!(parser_after_header().parse(&[0], true).is_err());
1629        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1630        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1631        assert_matches!(
1632            parser_after_header().parse(&[0, 2, 1], false),
1633            Ok(Chunk::NeedMoreData(1)),
1634        );
1635        assert_custom(
1636            parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1637            3,
1638            "",
1639            11,
1640            b"",
1641            Range { start: 10, end: 11 },
1642        );
1643        assert_custom(
1644            parser_after_header()
1645                .parse(&[0, 2, 1, b'a'], false)
1646                .unwrap(),
1647            4,
1648            "a",
1649            12,
1650            b"",
1651            Range { start: 10, end: 12 },
1652        );
1653        assert_custom(
1654            parser_after_header()
1655                .parse(&[0, 2, 0, b'a'], false)
1656                .unwrap(),
1657            4,
1658            "",
1659            11,
1660            b"a",
1661            Range { start: 10, end: 12 },
1662        );
1663    }
1664
1665    fn assert_custom(
1666        chunk: Chunk<'_>,
1667        expected_consumed: usize,
1668        expected_name: &str,
1669        expected_data_offset: usize,
1670        expected_data: &[u8],
1671        expected_range: Range<usize>,
1672    ) {
1673        let (consumed, s) = match chunk {
1674            Chunk::Parsed {
1675                consumed,
1676                payload: Payload::CustomSection(s),
1677            } => (consumed, s),
1678            _ => panic!("not a custom section payload"),
1679        };
1680        assert_eq!(consumed, expected_consumed);
1681        assert_eq!(s.name(), expected_name);
1682        assert_eq!(s.data_offset(), expected_data_offset);
1683        assert_eq!(s.data(), expected_data);
1684        assert_eq!(s.range(), expected_range);
1685    }
1686
1687    #[test]
1688    fn function_section() {
1689        assert!(parser_after_header().parse(&[10], true).is_err());
1690        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1691        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1692        assert_matches!(
1693            parser_after_header().parse(&[10], false),
1694            Ok(Chunk::NeedMoreData(1))
1695        );
1696        assert_matches!(
1697            parser_after_header().parse(&[10, 1], false),
1698            Ok(Chunk::NeedMoreData(1))
1699        );
1700        let mut p = parser_after_header();
1701        assert_matches!(
1702            p.parse(&[10, 1, 0], false),
1703            Ok(Chunk::Parsed {
1704                consumed: 3,
1705                payload: Payload::CodeSectionStart { count: 0, .. },
1706            }),
1707        );
1708        assert_matches!(
1709            p.parse(&[], true),
1710            Ok(Chunk::Parsed {
1711                consumed: 0,
1712                payload: Payload::End(11),
1713            }),
1714        );
1715        let mut p = parser_after_header();
1716        assert_matches!(
1717            p.parse(&[3, 2, 1, 0], false),
1718            Ok(Chunk::Parsed {
1719                consumed: 4,
1720                payload: Payload::FunctionSection { .. },
1721            }),
1722        );
1723        assert_matches!(
1724            p.parse(&[10, 2, 1, 0], false),
1725            Ok(Chunk::Parsed {
1726                consumed: 3,
1727                payload: Payload::CodeSectionStart { count: 1, .. },
1728            }),
1729        );
1730        assert_matches!(
1731            p.parse(&[0], false),
1732            Ok(Chunk::Parsed {
1733                consumed: 1,
1734                payload: Payload::CodeSectionEntry(_),
1735            }),
1736        );
1737        assert_matches!(
1738            p.parse(&[], true),
1739            Ok(Chunk::Parsed {
1740                consumed: 0,
1741                payload: Payload::End(16),
1742            }),
1743        );
1744
1745        // 1 byte section with 1 function can't read the function body because
1746        // the section is too small
1747        let mut p = parser_after_header();
1748        assert_matches!(
1749            p.parse(&[3, 2, 1, 0], false),
1750            Ok(Chunk::Parsed {
1751                consumed: 4,
1752                payload: Payload::FunctionSection { .. },
1753            }),
1754        );
1755        assert_matches!(
1756            p.parse(&[10, 1, 1], false),
1757            Ok(Chunk::Parsed {
1758                consumed: 3,
1759                payload: Payload::CodeSectionStart { count: 1, .. },
1760            }),
1761        );
1762        assert_eq!(
1763            p.parse(&[0], false).unwrap_err().message(),
1764            "unexpected end-of-file"
1765        );
1766
1767        // section with 2 functions but section is cut off
1768        let mut p = parser_after_header();
1769        assert_matches!(
1770            p.parse(&[3, 2, 2, 0], false),
1771            Ok(Chunk::Parsed {
1772                consumed: 4,
1773                payload: Payload::FunctionSection { .. },
1774            }),
1775        );
1776        assert_matches!(
1777            p.parse(&[10, 2, 2], false),
1778            Ok(Chunk::Parsed {
1779                consumed: 3,
1780                payload: Payload::CodeSectionStart { count: 2, .. },
1781            }),
1782        );
1783        assert_matches!(
1784            p.parse(&[0], false),
1785            Ok(Chunk::Parsed {
1786                consumed: 1,
1787                payload: Payload::CodeSectionEntry(_),
1788            }),
1789        );
1790        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1791        assert_eq!(
1792            p.parse(&[0], false).unwrap_err().message(),
1793            "unexpected end-of-file",
1794        );
1795
1796        // trailing data is bad
1797        let mut p = parser_after_header();
1798        assert_matches!(
1799            p.parse(&[3, 2, 1, 0], false),
1800            Ok(Chunk::Parsed {
1801                consumed: 4,
1802                payload: Payload::FunctionSection { .. },
1803            }),
1804        );
1805        assert_matches!(
1806            p.parse(&[10, 3, 1], false),
1807            Ok(Chunk::Parsed {
1808                consumed: 3,
1809                payload: Payload::CodeSectionStart { count: 1, .. },
1810            }),
1811        );
1812        assert_matches!(
1813            p.parse(&[0], false),
1814            Ok(Chunk::Parsed {
1815                consumed: 1,
1816                payload: Payload::CodeSectionEntry(_),
1817            }),
1818        );
1819        assert_eq!(
1820            p.parse(&[0], false).unwrap_err().message(),
1821            "trailing bytes at end of section",
1822        );
1823    }
1824
1825    #[test]
1826    fn single_module() {
1827        let mut p = parser_after_component_header();
1828        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1829
1830        // A module that's 8 bytes in length
1831        let mut sub = match p.parse(&[1, 8], false) {
1832            Ok(Chunk::Parsed {
1833                consumed: 2,
1834                payload: Payload::ModuleSection { parser, .. },
1835            }) => parser,
1836            other => panic!("bad parse {other:?}"),
1837        };
1838
1839        // Parse the header of the submodule with the sub-parser.
1840        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1841        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1842        assert_matches!(
1843            sub.parse(b"\0asm\x01\0\0\0", false),
1844            Ok(Chunk::Parsed {
1845                consumed: 8,
1846                payload: Payload::Version {
1847                    num: 1,
1848                    encoding: Encoding::Module,
1849                    ..
1850                },
1851            }),
1852        );
1853
1854        // The sub-parser should be byte-limited so the next byte shouldn't get
1855        // consumed, it's intended for the parent parser.
1856        assert_matches!(
1857            sub.parse(&[10], false),
1858            Ok(Chunk::Parsed {
1859                consumed: 0,
1860                payload: Payload::End(18),
1861            }),
1862        );
1863
1864        // The parent parser should now be back to resuming, and we simulate it
1865        // being done with bytes to ensure that it's safely at the end,
1866        // completing the module code section.
1867        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1868        assert_matches!(
1869            p.parse(&[], true),
1870            Ok(Chunk::Parsed {
1871                consumed: 0,
1872                payload: Payload::End(18),
1873            }),
1874        );
1875    }
1876
1877    #[test]
1878    fn nested_section_too_big() {
1879        let mut p = parser_after_component_header();
1880
1881        // A module that's 10 bytes in length
1882        let mut sub = match p.parse(&[1, 10], false) {
1883            Ok(Chunk::Parsed {
1884                consumed: 2,
1885                payload: Payload::ModuleSection { parser, .. },
1886            }) => parser,
1887            other => panic!("bad parse {other:?}"),
1888        };
1889
1890        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1891        // module.
1892        assert_matches!(
1893            sub.parse(b"\0asm\x01\0\0\0", false),
1894            Ok(Chunk::Parsed {
1895                consumed: 8,
1896                payload: Payload::Version { num: 1, .. },
1897            }),
1898        );
1899
1900        // We can't parse a section which declares its bigger than the outer
1901        // module. This is a custom section, one byte big, with one content byte. The
1902        // content byte, however, lives outside of the parent's module code
1903        // section.
1904        assert_eq!(
1905            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1906            "section too large",
1907        );
1908    }
1909}
wasmparser/parser.rs

wasmparser/
parser.rs