wasmparser/parser.rs
1#[cfg(feature = "features")]
2use crate::WasmFeatures;
3use crate::binary_reader::WASM_MAGIC_NUMBER;
4use crate::prelude::*;
5use crate::{
6 BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,
7 ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,
8 ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,
9 TypeSectionReader,
10};
11#[cfg(feature = "component-model")]
12use crate::{
13 ComponentCanonicalSectionReader, ComponentExportSectionReader, ComponentImportSectionReader,
14 ComponentInstanceSectionReader, ComponentStartFunction, ComponentTypeSectionReader,
15 CoreTypeSectionReader, InstanceSectionReader, SectionLimited, limits::MAX_WASM_MODULE_SIZE,
16};
17use core::fmt;
18use core::iter;
19use core::ops::Range;
20
21pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
22
23// Note that this started at `0xa` and we're incrementing up from there. When
24// the component model is stabilized this will become 0x1. The changes here are:
25//
26// * [????-??-??] 0xa - original version
27// * [2023-01-05] 0xb - `export` introduces an alias
28// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
29// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
30// allows for `(import (interface "...") ...)` syntax.
31pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
32
33const KIND_MODULE: u16 = 0x00;
34const KIND_COMPONENT: u16 = 0x01;
35
36/// The supported encoding formats for the parser.
37#[derive(Debug, Clone, Copy, Eq, PartialEq)]
38pub enum Encoding {
39 /// The encoding format is a WebAssembly module.
40 Module,
41 /// The encoding format is a WebAssembly component.
42 Component,
43}
44
45#[derive(Debug, Clone, Default)]
46struct ParserCounts {
47 function_entries: Option<u32>,
48 code_entries: Option<u32>,
49 data_entries: Option<u32>,
50 data_count: Option<u32>,
51 #[cfg(feature = "component-model")]
52 component_start_sections: bool,
53}
54
55// Section order for WebAssembly modules.
56//
57// Component sections are unordered and allow for duplicates,
58// so this isn't used for components.
59#[derive(Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Debug)]
60pub(crate) enum Order {
61 #[default]
62 Initial,
63 Type,
64 Import,
65 Function,
66 Table,
67 Memory,
68 Tag,
69 Global,
70 Export,
71 Start,
72 Element,
73 DataCount,
74 Code,
75 Data,
76}
77
78/// An incremental parser of a binary WebAssembly module or component.
79///
80/// This type is intended to be used to incrementally parse a WebAssembly module
81/// or component as bytes become available for the module. This can also be used
82/// to parse modules or components that are already entirely resident within memory.
83///
84/// This primary function for a parser is the [`Parser::parse`] function which
85/// will incrementally consume input. You can also use the [`Parser::parse_all`]
86/// function to parse a module or component that is entirely resident in memory.
87#[derive(Debug, Clone)]
88pub struct Parser {
89 state: State,
90 offset: u64,
91 max_size: u64,
92 encoding: Encoding,
93 #[cfg(feature = "features")]
94 features: WasmFeatures,
95 counts: ParserCounts,
96 order: (Order, u64),
97}
98
99#[derive(Debug, Clone)]
100enum State {
101 Header,
102 SectionStart,
103 FunctionBody { remaining: u32, len: u32 },
104}
105
106/// A successful return payload from [`Parser::parse`].
107///
108/// On success one of two possible values can be returned, either that more data
109/// is needed to continue parsing or a chunk of the input was parsed, indicating
110/// how much of it was parsed.
111#[derive(Debug)]
112pub enum Chunk<'a> {
113 /// This can be returned at any time and indicates that more data is needed
114 /// to proceed with parsing. Zero bytes were consumed from the input to
115 /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
116 /// bytes are needed to continue parsing.
117 NeedMoreData(u64),
118
119 /// A chunk was successfully parsed.
120 Parsed {
121 /// This many bytes of the `data` input to [`Parser::parse`] were
122 /// consumed to produce `payload`.
123 consumed: usize,
124 /// The value that we actually parsed.
125 payload: Payload<'a>,
126 },
127}
128
129/// Values that can be parsed from a WebAssembly module or component.
130///
131/// This enumeration is all possible chunks of pieces that can be parsed by a
132/// [`Parser`] from a binary WebAssembly module or component. Note that for many
133/// sections the entire section is parsed all at once, whereas other functions,
134/// like the code section, are parsed incrementally. This is a distinction where some
135/// sections, like the type section, are required to be fully resident in memory
136/// (fully downloaded) before proceeding. Other sections, like the code section,
137/// can be processed in a streaming fashion where each function is extracted
138/// individually so it can possibly be shipped to another thread while you wait
139/// for more functions to get downloaded.
140///
141/// Note that payloads, when returned, do not indicate that the module or component
142/// is valid. For example when you receive a `Payload::TypeSection` the type
143/// section itself has not yet actually been parsed. The reader returned will be
144/// able to parse it, but you'll have to actually iterate the reader to do the
145/// full parse. Each payload returned is intended to be a *window* into the
146/// original `data` passed to [`Parser::parse`] which can be further processed
147/// if necessary.
148#[non_exhaustive]
149pub enum Payload<'a> {
150 /// Indicates the header of a WebAssembly module or component.
151 Version {
152 /// The version number found in the header.
153 num: u16,
154 /// The encoding format being parsed.
155 encoding: Encoding,
156 /// The range of bytes that were parsed to consume the header of the
157 /// module or component. Note that this range is relative to the start
158 /// of the byte stream.
159 range: Range<usize>,
160 },
161
162 /// A module type section was received and the provided reader can be
163 /// used to parse the contents of the type section.
164 TypeSection(TypeSectionReader<'a>),
165 /// A module import section was received and the provided reader can be
166 /// used to parse the contents of the import section.
167 ImportSection(ImportSectionReader<'a>),
168 /// A module function section was received and the provided reader can be
169 /// used to parse the contents of the function section.
170 FunctionSection(FunctionSectionReader<'a>),
171 /// A module table section was received and the provided reader can be
172 /// used to parse the contents of the table section.
173 TableSection(TableSectionReader<'a>),
174 /// A module memory section was received and the provided reader can be
175 /// used to parse the contents of the memory section.
176 MemorySection(MemorySectionReader<'a>),
177 /// A module tag section was received, and the provided reader can be
178 /// used to parse the contents of the tag section.
179 TagSection(TagSectionReader<'a>),
180 /// A module global section was received and the provided reader can be
181 /// used to parse the contents of the global section.
182 GlobalSection(GlobalSectionReader<'a>),
183 /// A module export section was received, and the provided reader can be
184 /// used to parse the contents of the export section.
185 ExportSection(ExportSectionReader<'a>),
186 /// A module start section was received.
187 StartSection {
188 /// The start function index
189 func: u32,
190 /// The range of bytes that specify the `func` field, specified in
191 /// offsets relative to the start of the byte stream.
192 range: Range<usize>,
193 },
194 /// A module element section was received and the provided reader can be
195 /// used to parse the contents of the element section.
196 ElementSection(ElementSectionReader<'a>),
197 /// A module data count section was received.
198 DataCountSection {
199 /// The number of data segments.
200 count: u32,
201 /// The range of bytes that specify the `count` field, specified in
202 /// offsets relative to the start of the byte stream.
203 range: Range<usize>,
204 },
205 /// A module data section was received and the provided reader can be
206 /// used to parse the contents of the data section.
207 DataSection(DataSectionReader<'a>),
208 /// Indicator of the start of the code section of a WebAssembly module.
209 ///
210 /// This entry is returned whenever the code section starts. The `count`
211 /// field indicates how many entries are in this code section. After
212 /// receiving this start marker you're guaranteed that the next `count`
213 /// items will be either `CodeSectionEntry` or an error will be returned.
214 ///
215 /// This, unlike other sections, is intended to be used for streaming the
216 /// contents of the code section. The code section is not required to be
217 /// fully resident in memory when we parse it. Instead a [`Parser`] is
218 /// capable of parsing piece-by-piece of a code section.
219 CodeSectionStart {
220 /// The number of functions in this section.
221 count: u32,
222 /// The range of bytes that represent this section, specified in
223 /// offsets relative to the start of the byte stream.
224 range: Range<usize>,
225 /// The size, in bytes, of the remaining contents of this section.
226 ///
227 /// This can be used in combination with [`Parser::skip_section`]
228 /// where the caller will know how many bytes to skip before feeding
229 /// bytes into `Parser` again.
230 size: u32,
231 },
232 /// An entry of the code section, a function, was parsed from a WebAssembly
233 /// module.
234 ///
235 /// This entry indicates that a function was successfully received from the
236 /// code section, and the payload here is the window into the original input
237 /// where the function resides. Note that the function itself has not been
238 /// parsed, it's only been outlined. You'll need to process the
239 /// `FunctionBody` provided to test whether it parses and/or is valid.
240 CodeSectionEntry(FunctionBody<'a>),
241
242 /// A core module section was received and the provided parser can be
243 /// used to parse the nested module.
244 ///
245 /// This variant is special in that it returns a sub-`Parser`. Upon
246 /// receiving a `ModuleSection` it is expected that the returned
247 /// `Parser` will be used instead of the parent `Parser` until the parse has
248 /// finished. You'll need to feed data into the `Parser` returned until it
249 /// returns `Payload::End`. After that you'll switch back to the parent
250 /// parser to resume parsing the rest of the current component.
251 ///
252 /// Note that binaries will not be parsed correctly if you feed the data for
253 /// a nested module into the parent [`Parser`].
254 #[cfg(feature = "component-model")]
255 ModuleSection {
256 /// The parser for the nested module.
257 parser: Parser,
258 /// The range of bytes that represent the nested module in the
259 /// original byte stream.
260 ///
261 /// Note that, to better support streaming parsing and validation, the
262 /// validator does *not* check that this range is in bounds.
263 unchecked_range: Range<usize>,
264 },
265 /// A core instance section was received and the provided parser can be
266 /// used to parse the contents of the core instance section.
267 ///
268 /// Currently this section is only parsed in a component.
269 #[cfg(feature = "component-model")]
270 InstanceSection(InstanceSectionReader<'a>),
271 /// A core type section was received and the provided parser can be
272 /// used to parse the contents of the core type section.
273 ///
274 /// Currently this section is only parsed in a component.
275 #[cfg(feature = "component-model")]
276 CoreTypeSection(CoreTypeSectionReader<'a>),
277 /// A component section from a WebAssembly component was received and the
278 /// provided parser can be used to parse the nested component.
279 ///
280 /// This variant is special in that it returns a sub-`Parser`. Upon
281 /// receiving a `ComponentSection` it is expected that the returned
282 /// `Parser` will be used instead of the parent `Parser` until the parse has
283 /// finished. You'll need to feed data into the `Parser` returned until it
284 /// returns `Payload::End`. After that you'll switch back to the parent
285 /// parser to resume parsing the rest of the current component.
286 ///
287 /// Note that binaries will not be parsed correctly if you feed the data for
288 /// a nested component into the parent [`Parser`].
289 #[cfg(feature = "component-model")]
290 ComponentSection {
291 /// The parser for the nested component.
292 parser: Parser,
293 /// The range of bytes that represent the nested component in the
294 /// original byte stream.
295 ///
296 /// Note that, to better support streaming parsing and validation, the
297 /// validator does *not* check that this range is in bounds.
298 unchecked_range: Range<usize>,
299 },
300 /// A component instance section was received and the provided reader can be
301 /// used to parse the contents of the component instance section.
302 #[cfg(feature = "component-model")]
303 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
304 /// A component alias section was received and the provided reader can be
305 /// used to parse the contents of the component alias section.
306 #[cfg(feature = "component-model")]
307 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
308 /// A component type section was received and the provided reader can be
309 /// used to parse the contents of the component type section.
310 #[cfg(feature = "component-model")]
311 ComponentTypeSection(ComponentTypeSectionReader<'a>),
312 /// A component canonical section was received and the provided reader can be
313 /// used to parse the contents of the component canonical section.
314 #[cfg(feature = "component-model")]
315 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
316 /// A component start section was received.
317 #[cfg(feature = "component-model")]
318 ComponentStartSection {
319 /// The start function description.
320 start: ComponentStartFunction,
321 /// The range of bytes that specify the `start` field.
322 range: Range<usize>,
323 },
324 /// A component import section was received and the provided reader can be
325 /// used to parse the contents of the component import section.
326 #[cfg(feature = "component-model")]
327 ComponentImportSection(ComponentImportSectionReader<'a>),
328 /// A component export section was received, and the provided reader can be
329 /// used to parse the contents of the component export section.
330 #[cfg(feature = "component-model")]
331 ComponentExportSection(ComponentExportSectionReader<'a>),
332
333 /// A module or component custom section was received.
334 CustomSection(CustomSectionReader<'a>),
335
336 /// An unknown section was found.
337 ///
338 /// This variant is returned for all unknown sections encountered. This
339 /// likely wants to be interpreted as an error by consumers of the parser,
340 /// but this can also be used to parse sections currently unsupported by
341 /// the parser.
342 UnknownSection {
343 /// The 8-bit identifier for this section.
344 id: u8,
345 /// The contents of this section.
346 contents: &'a [u8],
347 /// The range of bytes, relative to the start of the original data
348 /// stream, that the contents of this section reside in.
349 range: Range<usize>,
350 },
351
352 /// The end of the WebAssembly module or component was reached.
353 ///
354 /// The value is the offset in the input byte stream where the end
355 /// was reached.
356 End(usize),
357}
358
359const CUSTOM_SECTION: u8 = 0;
360const TYPE_SECTION: u8 = 1;
361const IMPORT_SECTION: u8 = 2;
362const FUNCTION_SECTION: u8 = 3;
363const TABLE_SECTION: u8 = 4;
364const MEMORY_SECTION: u8 = 5;
365const GLOBAL_SECTION: u8 = 6;
366const EXPORT_SECTION: u8 = 7;
367const START_SECTION: u8 = 8;
368const ELEMENT_SECTION: u8 = 9;
369const CODE_SECTION: u8 = 10;
370const DATA_SECTION: u8 = 11;
371const DATA_COUNT_SECTION: u8 = 12;
372const TAG_SECTION: u8 = 13;
373
374#[cfg(feature = "component-model")]
375const COMPONENT_MODULE_SECTION: u8 = 1;
376#[cfg(feature = "component-model")]
377const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
378#[cfg(feature = "component-model")]
379const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
380#[cfg(feature = "component-model")]
381const COMPONENT_SECTION: u8 = 4;
382#[cfg(feature = "component-model")]
383const COMPONENT_INSTANCE_SECTION: u8 = 5;
384#[cfg(feature = "component-model")]
385const COMPONENT_ALIAS_SECTION: u8 = 6;
386#[cfg(feature = "component-model")]
387const COMPONENT_TYPE_SECTION: u8 = 7;
388#[cfg(feature = "component-model")]
389const COMPONENT_CANONICAL_SECTION: u8 = 8;
390#[cfg(feature = "component-model")]
391const COMPONENT_START_SECTION: u8 = 9;
392#[cfg(feature = "component-model")]
393const COMPONENT_IMPORT_SECTION: u8 = 10;
394#[cfg(feature = "component-model")]
395const COMPONENT_EXPORT_SECTION: u8 = 11;
396
397impl Parser {
398 /// Creates a new parser.
399 ///
400 /// Reports errors and ranges relative to `offset` provided, where `offset`
401 /// is some logical offset within the input stream that we're parsing.
402 pub fn new(offset: u64) -> Parser {
403 Parser {
404 state: State::Header,
405 offset,
406 max_size: u64::MAX,
407 // Assume the encoding is a module until we know otherwise
408 encoding: Encoding::Module,
409 #[cfg(feature = "features")]
410 features: WasmFeatures::all(),
411 counts: ParserCounts::default(),
412 order: (Order::default(), offset),
413 }
414 }
415
416 /// Tests whether `bytes` looks like a core WebAssembly module.
417 ///
418 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
419 /// starts with the standard core WebAssembly header.
420 pub fn is_core_wasm(bytes: &[u8]) -> bool {
421 const HEADER: [u8; 8] = [
422 WASM_MAGIC_NUMBER[0],
423 WASM_MAGIC_NUMBER[1],
424 WASM_MAGIC_NUMBER[2],
425 WASM_MAGIC_NUMBER[3],
426 WASM_MODULE_VERSION.to_le_bytes()[0],
427 WASM_MODULE_VERSION.to_le_bytes()[1],
428 KIND_MODULE.to_le_bytes()[0],
429 KIND_MODULE.to_le_bytes()[1],
430 ];
431 bytes.starts_with(&HEADER)
432 }
433
434 /// Tests whether `bytes` looks like a WebAssembly component.
435 ///
436 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
437 /// starts with the standard WebAssembly component header.
438 pub fn is_component(bytes: &[u8]) -> bool {
439 const HEADER: [u8; 8] = [
440 WASM_MAGIC_NUMBER[0],
441 WASM_MAGIC_NUMBER[1],
442 WASM_MAGIC_NUMBER[2],
443 WASM_MAGIC_NUMBER[3],
444 WASM_COMPONENT_VERSION.to_le_bytes()[0],
445 WASM_COMPONENT_VERSION.to_le_bytes()[1],
446 KIND_COMPONENT.to_le_bytes()[0],
447 KIND_COMPONENT.to_le_bytes()[1],
448 ];
449 bytes.starts_with(&HEADER)
450 }
451
452 /// Returns the currently active set of wasm features that this parser is
453 /// using while parsing.
454 ///
455 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
456 ///
457 /// For more information see [`BinaryReader::new`].
458 #[cfg(feature = "features")]
459 pub fn features(&self) -> WasmFeatures {
460 self.features
461 }
462
463 /// Sets the wasm features active while parsing to the `features` specified.
464 ///
465 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
466 ///
467 /// For more information see [`BinaryReader::new`].
468 #[cfg(feature = "features")]
469 pub fn set_features(&mut self, features: WasmFeatures) {
470 self.features = features;
471 }
472
473 /// Returns the original offset that this parser is currently at.
474 pub fn offset(&self) -> u64 {
475 self.offset
476 }
477
478 /// Attempts to parse a chunk of data.
479 ///
480 /// This method will attempt to parse the next incremental portion of a
481 /// WebAssembly binary. Data available for the module or component is
482 /// provided as `data`, and the data can be incomplete if more data has yet
483 /// to arrive. The `eof` flag indicates whether more data will ever be received.
484 ///
485 /// There are two ways parsing can succeed with this method:
486 ///
487 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
488 /// in `data` to parse a payload. The caller needs to wait for more data to
489 /// be available in this situation before calling this method again. It is
490 /// guaranteed that this is only returned if `eof` is `false`.
491 ///
492 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
493 /// successfully parsed. The payload is available in this variant of what
494 /// was parsed, and this also indicates how many bytes of `data` was
495 /// consumed. It's expected that the caller will not provide these bytes
496 /// back to the [`Parser`] again.
497 ///
498 /// Note that all `Chunk` return values are connected, with a lifetime, to
499 /// the input buffer. Each parsed chunk borrows the input buffer and is a
500 /// view into it for successfully parsed chunks.
501 ///
502 /// It is expected that you'll call this method until `Payload::End` is
503 /// reached, at which point you're guaranteed that the parse has completed.
504 /// Note that complete parsing, for the top-level module or component,
505 /// implies that `data` is empty and `eof` is `true`.
506 ///
507 /// # Errors
508 ///
509 /// Parse errors are returned as an `Err`. Errors can happen when the
510 /// structure of the data is unexpected or if sections are too large for
511 /// example. Note that errors are not returned for malformed *contents* of
512 /// sections here. Sections are generally not individually parsed and each
513 /// returned [`Payload`] needs to be iterated over further to detect all
514 /// errors.
515 ///
516 /// # Examples
517 ///
518 /// An example of reading a wasm file from a stream (`std::io::Read`) and
519 /// incrementally parsing it.
520 ///
521 /// ```
522 /// use std::io::Read;
523 /// use anyhow::Result;
524 /// use wasmparser::{Parser, Chunk, Payload::*};
525 ///
526 /// fn parse(mut reader: impl Read) -> Result<()> {
527 /// let mut buf = Vec::new();
528 /// let mut cur = Parser::new(0);
529 /// let mut eof = false;
530 /// let mut stack = Vec::new();
531 ///
532 /// loop {
533 /// let (payload, consumed) = match cur.parse(&buf, eof)? {
534 /// Chunk::NeedMoreData(hint) => {
535 /// assert!(!eof); // otherwise an error would be returned
536 ///
537 /// // Use the hint to preallocate more space, then read
538 /// // some more data into our buffer.
539 /// //
540 /// // Note that the buffer management here is not ideal,
541 /// // but it's compact enough to fit in an example!
542 /// let len = buf.len();
543 /// buf.extend((0..hint).map(|_| 0u8));
544 /// let n = reader.read(&mut buf[len..])?;
545 /// buf.truncate(len + n);
546 /// eof = n == 0;
547 /// continue;
548 /// }
549 ///
550 /// Chunk::Parsed { consumed, payload } => (payload, consumed),
551 /// };
552 ///
553 /// match payload {
554 /// // Sections for WebAssembly modules
555 /// Version { .. } => { /* ... */ }
556 /// TypeSection(_) => { /* ... */ }
557 /// ImportSection(_) => { /* ... */ }
558 /// FunctionSection(_) => { /* ... */ }
559 /// TableSection(_) => { /* ... */ }
560 /// MemorySection(_) => { /* ... */ }
561 /// TagSection(_) => { /* ... */ }
562 /// GlobalSection(_) => { /* ... */ }
563 /// ExportSection(_) => { /* ... */ }
564 /// StartSection { .. } => { /* ... */ }
565 /// ElementSection(_) => { /* ... */ }
566 /// DataCountSection { .. } => { /* ... */ }
567 /// DataSection(_) => { /* ... */ }
568 ///
569 /// // Here we know how many functions we'll be receiving as
570 /// // `CodeSectionEntry`, so we can prepare for that, and
571 /// // afterwards we can parse and handle each function
572 /// // individually.
573 /// CodeSectionStart { .. } => { /* ... */ }
574 /// CodeSectionEntry(body) => {
575 /// // here we can iterate over `body` to parse the function
576 /// // and its locals
577 /// }
578 ///
579 /// // Sections for WebAssembly components
580 /// InstanceSection(_) => { /* ... */ }
581 /// CoreTypeSection(_) => { /* ... */ }
582 /// ComponentInstanceSection(_) => { /* ... */ }
583 /// ComponentAliasSection(_) => { /* ... */ }
584 /// ComponentTypeSection(_) => { /* ... */ }
585 /// ComponentCanonicalSection(_) => { /* ... */ }
586 /// ComponentStartSection { .. } => { /* ... */ }
587 /// ComponentImportSection(_) => { /* ... */ }
588 /// ComponentExportSection(_) => { /* ... */ }
589 ///
590 /// ModuleSection { parser, .. }
591 /// | ComponentSection { parser, .. } => {
592 /// stack.push(cur.clone());
593 /// cur = parser.clone();
594 /// }
595 ///
596 /// CustomSection(_) => { /* ... */ }
597 ///
598 /// // Once we've reached the end of a parser we either resume
599 /// // at the parent parser or we break out of the loop because
600 /// // we're done.
601 /// End(_) => {
602 /// if let Some(parent_parser) = stack.pop() {
603 /// cur = parent_parser;
604 /// } else {
605 /// break;
606 /// }
607 /// }
608 ///
609 /// // most likely you'd return an error here
610 /// _ => { /* ... */ }
611 /// }
612 ///
613 /// // once we're done processing the payload we can forget the
614 /// // original.
615 /// buf.drain(..consumed);
616 /// }
617 ///
618 /// Ok(())
619 /// }
620 ///
621 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
622 /// ```
623 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
624 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
625 (&data[..(self.max_size as usize)], true)
626 } else {
627 (data, eof)
628 };
629 // TODO: thread through `offset: u64` to `BinaryReader`, remove
630 // the cast here.
631 let starting_offset = self.offset as usize;
632 let mut reader = BinaryReader::new(data, starting_offset);
633 #[cfg(feature = "features")]
634 {
635 reader.set_features(self.features);
636 }
637 match self.parse_reader(&mut reader, eof) {
638 Ok(payload) => {
639 // Be sure to update our offset with how far we got in the
640 // reader
641 let consumed = reader.original_position() - starting_offset;
642 self.offset += usize_to_u64(consumed);
643 self.max_size -= usize_to_u64(consumed);
644 Ok(Chunk::Parsed {
645 consumed: consumed,
646 payload,
647 })
648 }
649 Err(e) => {
650 // If we're at EOF then there's no way we can recover from any
651 // error, so continue to propagate it.
652 if eof {
653 return Err(e);
654 }
655
656 // If our error doesn't look like it can be resolved with more
657 // data being pulled down, then propagate it, otherwise switch
658 // the error to "feed me please"
659 match e.inner.needed_hint {
660 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
661 None => Err(e),
662 }
663 }
664 }
665 }
666
667 fn update_order(&mut self, order: Order, pos: usize) -> Result<()> {
668 let pos_u64 = usize_to_u64(pos);
669 if self.encoding == Encoding::Module {
670 match self.order {
671 (last_order, last_pos) if last_order >= order && last_pos < pos_u64 => {
672 bail!(pos, "section out of order")
673 }
674 _ => (),
675 }
676 }
677
678 self.order = (order, pos_u64);
679
680 Ok(())
681 }
682
683 fn parse_reader<'a>(
684 &mut self,
685 reader: &mut BinaryReader<'a>,
686 eof: bool,
687 ) -> Result<Payload<'a>> {
688 use Payload::*;
689
690 match self.state {
691 State::Header => {
692 let start = reader.original_position();
693 let header_version = reader.read_header_version()?;
694 let num = header_version as u16;
695 self.encoding = match (num, (header_version >> 16) as u16) {
696 (WASM_MODULE_VERSION, KIND_MODULE) => Encoding::Module,
697 (WASM_COMPONENT_VERSION, KIND_COMPONENT) => Encoding::Component,
698 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
699 };
700 self.state = State::SectionStart;
701 Ok(Version {
702 num,
703 encoding: self.encoding,
704 range: start..reader.original_position(),
705 })
706 }
707 State::SectionStart => {
708 // If we're at eof and there are no bytes in our buffer, then
709 // that means we reached the end of the data since it's
710 // just a bunch of sections concatenated after the header.
711 if eof && reader.bytes_remaining() == 0 {
712 self.check_function_code_counts(reader.original_position())?;
713 self.check_data_count(reader.original_position())?;
714 return Ok(Payload::End(reader.original_position()));
715 }
716
717 let id_pos = reader.original_position();
718 let id = reader.read_u8()?;
719 if id & 0x80 != 0 {
720 return Err(BinaryReaderError::new("malformed section id", id_pos));
721 }
722 let len_pos = reader.original_position();
723 let mut len = reader.read_var_u32()?;
724
725 // Test to make sure that this section actually fits within
726 // `Parser::max_size`. This doesn't matter for top-level modules
727 // but it is required for nested modules/components to correctly ensure
728 // that all sections live entirely within their section of the
729 // file.
730 let consumed = reader.original_position() - id_pos;
731 let section_overflow = self
732 .max_size
733 .checked_sub(usize_to_u64(consumed))
734 .and_then(|s| s.checked_sub(len.into()))
735 .is_none();
736 if section_overflow {
737 return Err(BinaryReaderError::new("section too large", len_pos));
738 }
739
740 match (self.encoding, id) {
741 // Sections for both modules and components.
742 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
743
744 // Module sections
745 (Encoding::Module, TYPE_SECTION) => {
746 self.update_order(Order::Type, reader.original_position())?;
747 section(reader, len, TypeSectionReader::new, TypeSection)
748 }
749 (Encoding::Module, IMPORT_SECTION) => {
750 self.update_order(Order::Import, reader.original_position())?;
751 section(reader, len, ImportSectionReader::new, ImportSection)
752 }
753 (Encoding::Module, FUNCTION_SECTION) => {
754 self.update_order(Order::Function, reader.original_position())?;
755 let s = section(reader, len, FunctionSectionReader::new, FunctionSection)?;
756 match &s {
757 FunctionSection(f) => self.counts.function_entries = Some(f.count()),
758 _ => unreachable!(),
759 }
760 Ok(s)
761 }
762 (Encoding::Module, TABLE_SECTION) => {
763 self.update_order(Order::Table, reader.original_position())?;
764 section(reader, len, TableSectionReader::new, TableSection)
765 }
766 (Encoding::Module, MEMORY_SECTION) => {
767 self.update_order(Order::Memory, reader.original_position())?;
768 section(reader, len, MemorySectionReader::new, MemorySection)
769 }
770 (Encoding::Module, GLOBAL_SECTION) => {
771 self.update_order(Order::Global, reader.original_position())?;
772 section(reader, len, GlobalSectionReader::new, GlobalSection)
773 }
774 (Encoding::Module, EXPORT_SECTION) => {
775 self.update_order(Order::Export, reader.original_position())?;
776 section(reader, len, ExportSectionReader::new, ExportSection)
777 }
778 (Encoding::Module, START_SECTION) => {
779 self.update_order(Order::Start, reader.original_position())?;
780 let (func, range) = single_item(reader, len, "start")?;
781 Ok(StartSection { func, range })
782 }
783 (Encoding::Module, ELEMENT_SECTION) => {
784 self.update_order(Order::Element, reader.original_position())?;
785 section(reader, len, ElementSectionReader::new, ElementSection)
786 }
787 (Encoding::Module, CODE_SECTION) => {
788 self.update_order(Order::Code, reader.original_position())?;
789 let start = reader.original_position();
790 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
791 self.counts.code_entries = Some(count);
792 self.check_function_code_counts(start)?;
793 let range = start..reader.original_position() + len as usize;
794 self.state = State::FunctionBody {
795 remaining: count,
796 len,
797 };
798 Ok(CodeSectionStart {
799 count,
800 range,
801 size: len,
802 })
803 }
804 (Encoding::Module, DATA_SECTION) => {
805 self.update_order(Order::Data, reader.original_position())?;
806 let s = section(reader, len, DataSectionReader::new, DataSection)?;
807 match &s {
808 DataSection(d) => self.counts.data_entries = Some(d.count()),
809 _ => unreachable!(),
810 }
811 self.check_data_count(reader.original_position())?;
812 Ok(s)
813 }
814 (Encoding::Module, DATA_COUNT_SECTION) => {
815 self.update_order(Order::DataCount, reader.original_position())?;
816 let (count, range) = single_item(reader, len, "data count")?;
817 self.counts.data_count = Some(count);
818 Ok(DataCountSection { count, range })
819 }
820 (Encoding::Module, TAG_SECTION) => {
821 self.update_order(Order::Tag, reader.original_position())?;
822 section(reader, len, TagSectionReader::new, TagSection)
823 }
824
825 // Component sections
826 #[cfg(feature = "component-model")]
827 (Encoding::Component, COMPONENT_MODULE_SECTION)
828 | (Encoding::Component, COMPONENT_SECTION) => {
829 if len as usize > MAX_WASM_MODULE_SIZE {
830 bail!(
831 len_pos,
832 "{} section is too large",
833 if id == 1 { "module" } else { "component " }
834 );
835 }
836
837 let range = reader.original_position()
838 ..reader.original_position() + usize::try_from(len).unwrap();
839 self.max_size -= u64::from(len);
840 self.offset += u64::from(len);
841 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
842 #[cfg(feature = "features")]
843 {
844 parser.features = self.features;
845 }
846 parser.max_size = u64::from(len);
847
848 Ok(match id {
849 1 => ModuleSection {
850 parser,
851 unchecked_range: range,
852 },
853 4 => ComponentSection {
854 parser,
855 unchecked_range: range,
856 },
857 _ => unreachable!(),
858 })
859 }
860 #[cfg(feature = "component-model")]
861 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
862 section(reader, len, InstanceSectionReader::new, InstanceSection)
863 }
864 #[cfg(feature = "component-model")]
865 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
866 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
867 }
868 #[cfg(feature = "component-model")]
869 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
870 reader,
871 len,
872 ComponentInstanceSectionReader::new,
873 ComponentInstanceSection,
874 ),
875 #[cfg(feature = "component-model")]
876 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
877 section(reader, len, SectionLimited::new, ComponentAliasSection)
878 }
879 #[cfg(feature = "component-model")]
880 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
881 reader,
882 len,
883 ComponentTypeSectionReader::new,
884 ComponentTypeSection,
885 ),
886 #[cfg(feature = "component-model")]
887 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
888 reader,
889 len,
890 ComponentCanonicalSectionReader::new,
891 ComponentCanonicalSection,
892 ),
893 #[cfg(feature = "component-model")]
894 (Encoding::Component, COMPONENT_START_SECTION) => {
895 match self.counts.component_start_sections {
896 false => self.counts.component_start_sections = true,
897 true => {
898 bail!(
899 reader.original_position(),
900 "component cannot have more than one start function"
901 )
902 }
903 }
904 let (start, range) = single_item(reader, len, "component start")?;
905 Ok(ComponentStartSection { start, range })
906 }
907 #[cfg(feature = "component-model")]
908 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
909 reader,
910 len,
911 ComponentImportSectionReader::new,
912 ComponentImportSection,
913 ),
914 #[cfg(feature = "component-model")]
915 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
916 reader,
917 len,
918 ComponentExportSectionReader::new,
919 ComponentExportSection,
920 ),
921 (_, id) => {
922 let offset = reader.original_position();
923 let contents = reader.read_bytes(len as usize)?;
924 let range = offset..offset + len as usize;
925 Ok(UnknownSection {
926 id,
927 contents,
928 range,
929 })
930 }
931 }
932 }
933
934 // Once we hit 0 remaining incrementally parsed items, with 0
935 // remaining bytes in each section, we're done and can switch back
936 // to parsing sections.
937 State::FunctionBody {
938 remaining: 0,
939 len: 0,
940 } => {
941 self.state = State::SectionStart;
942 self.parse_reader(reader, eof)
943 }
944
945 // ... otherwise trailing bytes with no remaining entries in these
946 // sections indicates an error.
947 State::FunctionBody { remaining: 0, len } => {
948 debug_assert!(len > 0);
949 let offset = reader.original_position();
950 Err(BinaryReaderError::new(
951 "trailing bytes at end of section",
952 offset,
953 ))
954 }
955
956 // Functions are relatively easy to parse when we know there's at
957 // least one remaining and at least one byte available to read
958 // things.
959 //
960 // We use the remaining length try to read a u32 size of the
961 // function, and using that size we require the entire function be
962 // resident in memory. This means that we're reading whole chunks of
963 // functions at a time.
964 //
965 // Limiting via `Parser::max_size` (nested parsing) happens above in
966 // `fn parse`, and limiting by our section size happens via
967 // `delimited`. Actual parsing of the function body is delegated to
968 // the caller to iterate over the `FunctionBody` structure.
969 State::FunctionBody { remaining, mut len } => {
970 let body = delimited(reader, &mut len, |r| {
971 Ok(FunctionBody::new(r.read_reader()?))
972 })?;
973 self.state = State::FunctionBody {
974 remaining: remaining - 1,
975 len,
976 };
977 Ok(CodeSectionEntry(body))
978 }
979 }
980 }
981
982 /// Convenience function that can be used to parse a module or component
983 /// that is entirely resident in memory.
984 ///
985 /// This function will parse the `data` provided as a WebAssembly module
986 /// or component.
987 ///
988 /// Note that when this function yields sections that provide parsers,
989 /// no further action is required for those sections as payloads from
990 /// those parsers will be automatically returned.
991 ///
992 /// # Examples
993 ///
994 /// An example of reading a wasm file from a stream (`std::io::Read`) into
995 /// a buffer and then parsing it.
996 ///
997 /// ```
998 /// use std::io::Read;
999 /// use anyhow::Result;
1000 /// use wasmparser::{Parser, Chunk, Payload::*};
1001 ///
1002 /// fn parse(mut reader: impl Read) -> Result<()> {
1003 /// let mut buf = Vec::new();
1004 /// reader.read_to_end(&mut buf)?;
1005 /// let parser = Parser::new(0);
1006 ///
1007 /// for payload in parser.parse_all(&buf) {
1008 /// match payload? {
1009 /// // Sections for WebAssembly modules
1010 /// Version { .. } => { /* ... */ }
1011 /// TypeSection(_) => { /* ... */ }
1012 /// ImportSection(_) => { /* ... */ }
1013 /// FunctionSection(_) => { /* ... */ }
1014 /// TableSection(_) => { /* ... */ }
1015 /// MemorySection(_) => { /* ... */ }
1016 /// TagSection(_) => { /* ... */ }
1017 /// GlobalSection(_) => { /* ... */ }
1018 /// ExportSection(_) => { /* ... */ }
1019 /// StartSection { .. } => { /* ... */ }
1020 /// ElementSection(_) => { /* ... */ }
1021 /// DataCountSection { .. } => { /* ... */ }
1022 /// DataSection(_) => { /* ... */ }
1023 ///
1024 /// // Here we know how many functions we'll be receiving as
1025 /// // `CodeSectionEntry`, so we can prepare for that, and
1026 /// // afterwards we can parse and handle each function
1027 /// // individually.
1028 /// CodeSectionStart { .. } => { /* ... */ }
1029 /// CodeSectionEntry(body) => {
1030 /// // here we can iterate over `body` to parse the function
1031 /// // and its locals
1032 /// }
1033 ///
1034 /// // Sections for WebAssembly components
1035 /// ModuleSection { .. } => { /* ... */ }
1036 /// InstanceSection(_) => { /* ... */ }
1037 /// CoreTypeSection(_) => { /* ... */ }
1038 /// ComponentSection { .. } => { /* ... */ }
1039 /// ComponentInstanceSection(_) => { /* ... */ }
1040 /// ComponentAliasSection(_) => { /* ... */ }
1041 /// ComponentTypeSection(_) => { /* ... */ }
1042 /// ComponentCanonicalSection(_) => { /* ... */ }
1043 /// ComponentStartSection { .. } => { /* ... */ }
1044 /// ComponentImportSection(_) => { /* ... */ }
1045 /// ComponentExportSection(_) => { /* ... */ }
1046 ///
1047 /// CustomSection(_) => { /* ... */ }
1048 ///
1049 /// // Once we've reached the end of a parser we either resume
1050 /// // at the parent parser or the payload iterator is at its
1051 /// // end and we're done.
1052 /// End(_) => {}
1053 ///
1054 /// // most likely you'd return an error here, but if you want
1055 /// // you can also inspect the raw contents of unknown sections
1056 /// other => {
1057 /// match other.as_section() {
1058 /// Some((id, range)) => { /* ... */ }
1059 /// None => { /* ... */ }
1060 /// }
1061 /// }
1062 /// }
1063 /// }
1064 ///
1065 /// Ok(())
1066 /// }
1067 ///
1068 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
1069 /// ```
1070 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload<'_>>> {
1071 let mut stack = Vec::new();
1072 let mut cur = self;
1073 let mut done = false;
1074 iter::from_fn(move || {
1075 if done {
1076 return None;
1077 }
1078 let payload = match cur.parse(data, true) {
1079 // Propagate all errors
1080 Err(e) => {
1081 done = true;
1082 return Some(Err(e));
1083 }
1084
1085 // This isn't possible because `eof` is always true.
1086 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
1087
1088 Ok(Chunk::Parsed { payload, consumed }) => {
1089 data = &data[consumed..];
1090 payload
1091 }
1092 };
1093
1094 match &payload {
1095 #[cfg(feature = "component-model")]
1096 Payload::ModuleSection { parser, .. }
1097 | Payload::ComponentSection { parser, .. } => {
1098 stack.push(cur.clone());
1099 cur = parser.clone();
1100 }
1101 Payload::End(_) => match stack.pop() {
1102 Some(p) => cur = p,
1103 None => done = true,
1104 },
1105
1106 _ => {}
1107 }
1108
1109 Some(Ok(payload))
1110 })
1111 }
1112
1113 /// Skip parsing the code section entirely.
1114 ///
1115 /// This function can be used to indicate, after receiving
1116 /// `CodeSectionStart`, that the section will not be parsed.
1117 ///
1118 /// The caller will be responsible for skipping `size` bytes (found in the
1119 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
1120 /// after the `size` bytes have been skipped.
1121 ///
1122 /// # Panics
1123 ///
1124 /// This function will panic if the parser is not in a state where it's
1125 /// parsing the code section.
1126 ///
1127 /// # Examples
1128 ///
1129 /// ```
1130 /// use wasmparser::{Result, Parser, Chunk, Payload::*};
1131 /// use core::ops::Range;
1132 ///
1133 /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
1134 /// let mut parser = Parser::new(0);
1135 /// loop {
1136 /// let payload = match parser.parse(wasm, true)? {
1137 /// Chunk::Parsed { consumed, payload } => {
1138 /// wasm = &wasm[consumed..];
1139 /// payload
1140 /// }
1141 /// // this state isn't possible with `eof = true`
1142 /// Chunk::NeedMoreData(_) => unreachable!(),
1143 /// };
1144 /// match payload {
1145 /// TypeSection(s) => print_range("type section", &s.range()),
1146 /// ImportSection(s) => print_range("import section", &s.range()),
1147 /// // .. other sections
1148 ///
1149 /// // Print the range of the code section we see, but don't
1150 /// // actually iterate over each individual function.
1151 /// CodeSectionStart { range, size, .. } => {
1152 /// print_range("code section", &range);
1153 /// parser.skip_section();
1154 /// wasm = &wasm[size as usize..];
1155 /// }
1156 /// End(_) => break,
1157 /// _ => {}
1158 /// }
1159 /// }
1160 /// Ok(())
1161 /// }
1162 ///
1163 /// fn print_range(section: &str, range: &Range<usize>) {
1164 /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1165 /// }
1166 /// ```
1167 pub fn skip_section(&mut self) {
1168 let skip = match self.state {
1169 State::FunctionBody { remaining: _, len } => len,
1170 _ => panic!("wrong state to call `skip_section`"),
1171 };
1172 self.offset += u64::from(skip);
1173 self.max_size -= u64::from(skip);
1174 self.state = State::SectionStart;
1175 }
1176
1177 fn check_function_code_counts(&self, pos: usize) -> Result<()> {
1178 match (self.counts.function_entries, self.counts.code_entries) {
1179 (Some(n), Some(m)) if n != m => {
1180 bail!(pos, "function and code section have inconsistent lengths")
1181 }
1182 (Some(n), None) if n > 0 => bail!(
1183 pos,
1184 "function section has non-zero count but code section is absent"
1185 ),
1186 (None, Some(m)) if m > 0 => bail!(
1187 pos,
1188 "function section is absent but code section has non-zero count"
1189 ),
1190 _ => Ok(()),
1191 }
1192 }
1193
1194 fn check_data_count(&self, pos: usize) -> Result<()> {
1195 match (self.counts.data_count, self.counts.data_entries) {
1196 (Some(n), Some(m)) if n != m => {
1197 bail!(pos, "data count and data section have inconsistent lengths")
1198 }
1199 (Some(n), None) if n > 0 => {
1200 bail!(pos, "data count is non-zero but data section is absent")
1201 }
1202 _ => Ok(()),
1203 }
1204 }
1205}
1206
1207fn usize_to_u64(a: usize) -> u64 {
1208 a.try_into().unwrap()
1209}
1210
1211/// Parses an entire section resident in memory into a `Payload`.
1212///
1213/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1214/// to construct the section to return.
1215fn section<'a, T>(
1216 reader: &mut BinaryReader<'a>,
1217 len: u32,
1218 ctor: fn(BinaryReader<'a>) -> Result<T>,
1219 variant: fn(T) -> Payload<'a>,
1220) -> Result<Payload<'a>> {
1221 let reader = reader.skip(|r| {
1222 r.read_bytes(len as usize)?;
1223 Ok(())
1224 })?;
1225 // clear the hint for "need this many more bytes" here because we already
1226 // read all the bytes, so it's not possible to read more bytes if this
1227 // fails.
1228 let reader = ctor(reader).map_err(clear_hint)?;
1229 Ok(variant(reader))
1230}
1231
1232/// Reads a section that is represented by a single uleb-encoded `u32`.
1233fn single_item<'a, T>(
1234 reader: &mut BinaryReader<'a>,
1235 len: u32,
1236 desc: &str,
1237) -> Result<(T, Range<usize>)>
1238where
1239 T: FromReader<'a>,
1240{
1241 let range = reader.original_position()..reader.original_position() + len as usize;
1242 let mut content = reader.skip(|r| {
1243 r.read_bytes(len as usize)?;
1244 Ok(())
1245 })?;
1246 // We can't recover from "unexpected eof" here because our entire section is
1247 // already resident in memory, so clear the hint for how many more bytes are
1248 // expected.
1249 let ret = content.read().map_err(clear_hint)?;
1250 if !content.eof() {
1251 bail!(
1252 content.original_position(),
1253 "unexpected content in the {desc} section",
1254 );
1255 }
1256 Ok((ret, range))
1257}
1258
1259/// Attempts to parse using `f`.
1260///
1261/// This will update `*len` with the number of bytes consumed, and it will cause
1262/// a failure to be returned instead of the number of bytes consumed exceeds
1263/// what `*len` currently is.
1264fn delimited<'a, T>(
1265 reader: &mut BinaryReader<'a>,
1266 len: &mut u32,
1267 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1268) -> Result<T> {
1269 let start = reader.original_position();
1270 let ret = f(reader)?;
1271 *len = match (reader.original_position() - start)
1272 .try_into()
1273 .ok()
1274 .and_then(|i| len.checked_sub(i))
1275 {
1276 Some(i) => i,
1277 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1278 };
1279 Ok(ret)
1280}
1281
1282impl Default for Parser {
1283 fn default() -> Parser {
1284 Parser::new(0)
1285 }
1286}
1287
1288impl Payload<'_> {
1289 /// If this `Payload` represents a section in the original wasm module then
1290 /// the section's id and range within the original wasm binary are returned.
1291 ///
1292 /// Not all payloads refer to entire sections, such as the `Version` and
1293 /// `CodeSectionEntry` variants. These variants will return `None` from this
1294 /// function.
1295 ///
1296 /// Otherwise this function will return `Some` where the first element is
1297 /// the byte identifier for the section and the second element is the range
1298 /// of the contents of the section within the original wasm binary.
1299 ///
1300 /// The purpose of this method is to enable tools to easily iterate over
1301 /// entire sections if necessary and handle sections uniformly, for example
1302 /// dropping custom sections while preserving all other sections.
1303 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1304 use Payload::*;
1305
1306 match self {
1307 Version { .. } => None,
1308 TypeSection(s) => Some((TYPE_SECTION, s.range())),
1309 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1310 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1311 TableSection(s) => Some((TABLE_SECTION, s.range())),
1312 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1313 TagSection(s) => Some((TAG_SECTION, s.range())),
1314 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1315 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1316 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1317 DataSection(s) => Some((DATA_SECTION, s.range())),
1318 StartSection { range, .. } => Some((START_SECTION, range.clone())),
1319 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1320 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1321 CodeSectionEntry(_) => None,
1322
1323 #[cfg(feature = "component-model")]
1324 ModuleSection {
1325 unchecked_range: range,
1326 ..
1327 } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1328 #[cfg(feature = "component-model")]
1329 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1330 #[cfg(feature = "component-model")]
1331 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1332 #[cfg(feature = "component-model")]
1333 ComponentSection {
1334 unchecked_range: range,
1335 ..
1336 } => Some((COMPONENT_SECTION, range.clone())),
1337 #[cfg(feature = "component-model")]
1338 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1339 #[cfg(feature = "component-model")]
1340 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1341 #[cfg(feature = "component-model")]
1342 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1343 #[cfg(feature = "component-model")]
1344 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1345 #[cfg(feature = "component-model")]
1346 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1347 #[cfg(feature = "component-model")]
1348 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1349 #[cfg(feature = "component-model")]
1350 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1351
1352 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1353
1354 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1355
1356 End(_) => None,
1357 }
1358 }
1359}
1360
1361impl fmt::Debug for Payload<'_> {
1362 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1363 use Payload::*;
1364 match self {
1365 Version {
1366 num,
1367 encoding,
1368 range,
1369 } => f
1370 .debug_struct("Version")
1371 .field("num", num)
1372 .field("encoding", encoding)
1373 .field("range", range)
1374 .finish(),
1375
1376 // Module sections
1377 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1378 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1379 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1380 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1381 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1382 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1383 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1384 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1385 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1386 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1387 StartSection { func, range } => f
1388 .debug_struct("StartSection")
1389 .field("func", func)
1390 .field("range", range)
1391 .finish(),
1392 DataCountSection { count, range } => f
1393 .debug_struct("DataCountSection")
1394 .field("count", count)
1395 .field("range", range)
1396 .finish(),
1397 CodeSectionStart { count, range, size } => f
1398 .debug_struct("CodeSectionStart")
1399 .field("count", count)
1400 .field("range", range)
1401 .field("size", size)
1402 .finish(),
1403 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1404
1405 // Component sections
1406 #[cfg(feature = "component-model")]
1407 ModuleSection {
1408 parser: _,
1409 unchecked_range: range,
1410 } => f
1411 .debug_struct("ModuleSection")
1412 .field("range", range)
1413 .finish(),
1414 #[cfg(feature = "component-model")]
1415 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1416 #[cfg(feature = "component-model")]
1417 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1418 #[cfg(feature = "component-model")]
1419 ComponentSection {
1420 parser: _,
1421 unchecked_range: range,
1422 } => f
1423 .debug_struct("ComponentSection")
1424 .field("range", range)
1425 .finish(),
1426 #[cfg(feature = "component-model")]
1427 ComponentInstanceSection(_) => f
1428 .debug_tuple("ComponentInstanceSection")
1429 .field(&"...")
1430 .finish(),
1431 #[cfg(feature = "component-model")]
1432 ComponentAliasSection(_) => f
1433 .debug_tuple("ComponentAliasSection")
1434 .field(&"...")
1435 .finish(),
1436 #[cfg(feature = "component-model")]
1437 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1438 #[cfg(feature = "component-model")]
1439 ComponentCanonicalSection(_) => f
1440 .debug_tuple("ComponentCanonicalSection")
1441 .field(&"...")
1442 .finish(),
1443 #[cfg(feature = "component-model")]
1444 ComponentStartSection { .. } => f
1445 .debug_tuple("ComponentStartSection")
1446 .field(&"...")
1447 .finish(),
1448 #[cfg(feature = "component-model")]
1449 ComponentImportSection(_) => f
1450 .debug_tuple("ComponentImportSection")
1451 .field(&"...")
1452 .finish(),
1453 #[cfg(feature = "component-model")]
1454 ComponentExportSection(_) => f
1455 .debug_tuple("ComponentExportSection")
1456 .field(&"...")
1457 .finish(),
1458
1459 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1460
1461 UnknownSection { id, range, .. } => f
1462 .debug_struct("UnknownSection")
1463 .field("id", id)
1464 .field("range", range)
1465 .finish(),
1466
1467 End(offset) => f.debug_tuple("End").field(offset).finish(),
1468 }
1469 }
1470}
1471
1472fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1473 err.inner.needed_hint = None;
1474 err
1475}
1476
1477#[cfg(test)]
1478mod tests {
1479 use super::*;
1480
1481 macro_rules! assert_matches {
1482 ($a:expr, $b:pat $(,)?) => {
1483 match $a {
1484 $b => {}
1485 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1486 }
1487 };
1488 }
1489
1490 #[test]
1491 fn header() {
1492 assert!(Parser::default().parse(&[], true).is_err());
1493 assert_matches!(
1494 Parser::default().parse(&[], false),
1495 Ok(Chunk::NeedMoreData(4)),
1496 );
1497 assert_matches!(
1498 Parser::default().parse(b"\0", false),
1499 Ok(Chunk::NeedMoreData(3)),
1500 );
1501 assert_matches!(
1502 Parser::default().parse(b"\0asm", false),
1503 Ok(Chunk::NeedMoreData(4)),
1504 );
1505 assert_matches!(
1506 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1507 Ok(Chunk::Parsed {
1508 consumed: 8,
1509 payload: Payload::Version { num: 1, .. },
1510 }),
1511 );
1512 }
1513
1514 #[test]
1515 fn header_iter() {
1516 for _ in Parser::default().parse_all(&[]) {}
1517 for _ in Parser::default().parse_all(b"\0") {}
1518 for _ in Parser::default().parse_all(b"\0asm") {}
1519 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1520 }
1521
1522 fn parser_after_header() -> Parser {
1523 let mut p = Parser::default();
1524 assert_matches!(
1525 p.parse(b"\0asm\x01\0\0\0", false),
1526 Ok(Chunk::Parsed {
1527 consumed: 8,
1528 payload: Payload::Version {
1529 num: WASM_MODULE_VERSION,
1530 encoding: Encoding::Module,
1531 ..
1532 },
1533 }),
1534 );
1535 p
1536 }
1537
1538 fn parser_after_component_header() -> Parser {
1539 let mut p = Parser::default();
1540 assert_matches!(
1541 p.parse(b"\0asm\x0d\0\x01\0", false),
1542 Ok(Chunk::Parsed {
1543 consumed: 8,
1544 payload: Payload::Version {
1545 num: WASM_COMPONENT_VERSION,
1546 encoding: Encoding::Component,
1547 ..
1548 },
1549 }),
1550 );
1551 p
1552 }
1553
1554 #[test]
1555 fn start_section() {
1556 assert_matches!(
1557 parser_after_header().parse(&[], false),
1558 Ok(Chunk::NeedMoreData(1)),
1559 );
1560 assert!(parser_after_header().parse(&[8], true).is_err());
1561 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1562 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1563 assert_matches!(
1564 parser_after_header().parse(&[8], false),
1565 Ok(Chunk::NeedMoreData(1)),
1566 );
1567 assert_matches!(
1568 parser_after_header().parse(&[8, 1], false),
1569 Ok(Chunk::NeedMoreData(1)),
1570 );
1571 assert_matches!(
1572 parser_after_header().parse(&[8, 2], false),
1573 Ok(Chunk::NeedMoreData(2)),
1574 );
1575 assert_matches!(
1576 parser_after_header().parse(&[8, 1, 1], false),
1577 Ok(Chunk::Parsed {
1578 consumed: 3,
1579 payload: Payload::StartSection { func: 1, .. },
1580 }),
1581 );
1582 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1583 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1584 }
1585
1586 #[test]
1587 fn end_works() {
1588 assert_matches!(
1589 parser_after_header().parse(&[], true),
1590 Ok(Chunk::Parsed {
1591 consumed: 0,
1592 payload: Payload::End(8),
1593 }),
1594 );
1595 }
1596
1597 #[test]
1598 fn type_section() {
1599 assert!(parser_after_header().parse(&[1], true).is_err());
1600 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1601 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1602 assert_matches!(
1603 parser_after_header().parse(&[1], false),
1604 Ok(Chunk::NeedMoreData(1)),
1605 );
1606 assert_matches!(
1607 parser_after_header().parse(&[1, 1], false),
1608 Ok(Chunk::NeedMoreData(1)),
1609 );
1610 assert_matches!(
1611 parser_after_header().parse(&[1, 1, 1], false),
1612 Ok(Chunk::Parsed {
1613 consumed: 3,
1614 payload: Payload::TypeSection(_),
1615 }),
1616 );
1617 assert_matches!(
1618 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1619 Ok(Chunk::Parsed {
1620 consumed: 3,
1621 payload: Payload::TypeSection(_),
1622 }),
1623 );
1624 }
1625
1626 #[test]
1627 fn custom_section() {
1628 assert!(parser_after_header().parse(&[0], true).is_err());
1629 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1630 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1631 assert_matches!(
1632 parser_after_header().parse(&[0, 2, 1], false),
1633 Ok(Chunk::NeedMoreData(1)),
1634 );
1635 assert_custom(
1636 parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1637 3,
1638 "",
1639 11,
1640 b"",
1641 Range { start: 10, end: 11 },
1642 );
1643 assert_custom(
1644 parser_after_header()
1645 .parse(&[0, 2, 1, b'a'], false)
1646 .unwrap(),
1647 4,
1648 "a",
1649 12,
1650 b"",
1651 Range { start: 10, end: 12 },
1652 );
1653 assert_custom(
1654 parser_after_header()
1655 .parse(&[0, 2, 0, b'a'], false)
1656 .unwrap(),
1657 4,
1658 "",
1659 11,
1660 b"a",
1661 Range { start: 10, end: 12 },
1662 );
1663 }
1664
1665 fn assert_custom(
1666 chunk: Chunk<'_>,
1667 expected_consumed: usize,
1668 expected_name: &str,
1669 expected_data_offset: usize,
1670 expected_data: &[u8],
1671 expected_range: Range<usize>,
1672 ) {
1673 let (consumed, s) = match chunk {
1674 Chunk::Parsed {
1675 consumed,
1676 payload: Payload::CustomSection(s),
1677 } => (consumed, s),
1678 _ => panic!("not a custom section payload"),
1679 };
1680 assert_eq!(consumed, expected_consumed);
1681 assert_eq!(s.name(), expected_name);
1682 assert_eq!(s.data_offset(), expected_data_offset);
1683 assert_eq!(s.data(), expected_data);
1684 assert_eq!(s.range(), expected_range);
1685 }
1686
1687 #[test]
1688 fn function_section() {
1689 assert!(parser_after_header().parse(&[10], true).is_err());
1690 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1691 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1692 assert_matches!(
1693 parser_after_header().parse(&[10], false),
1694 Ok(Chunk::NeedMoreData(1))
1695 );
1696 assert_matches!(
1697 parser_after_header().parse(&[10, 1], false),
1698 Ok(Chunk::NeedMoreData(1))
1699 );
1700 let mut p = parser_after_header();
1701 assert_matches!(
1702 p.parse(&[10, 1, 0], false),
1703 Ok(Chunk::Parsed {
1704 consumed: 3,
1705 payload: Payload::CodeSectionStart { count: 0, .. },
1706 }),
1707 );
1708 assert_matches!(
1709 p.parse(&[], true),
1710 Ok(Chunk::Parsed {
1711 consumed: 0,
1712 payload: Payload::End(11),
1713 }),
1714 );
1715 let mut p = parser_after_header();
1716 assert_matches!(
1717 p.parse(&[3, 2, 1, 0], false),
1718 Ok(Chunk::Parsed {
1719 consumed: 4,
1720 payload: Payload::FunctionSection { .. },
1721 }),
1722 );
1723 assert_matches!(
1724 p.parse(&[10, 2, 1, 0], false),
1725 Ok(Chunk::Parsed {
1726 consumed: 3,
1727 payload: Payload::CodeSectionStart { count: 1, .. },
1728 }),
1729 );
1730 assert_matches!(
1731 p.parse(&[0], false),
1732 Ok(Chunk::Parsed {
1733 consumed: 1,
1734 payload: Payload::CodeSectionEntry(_),
1735 }),
1736 );
1737 assert_matches!(
1738 p.parse(&[], true),
1739 Ok(Chunk::Parsed {
1740 consumed: 0,
1741 payload: Payload::End(16),
1742 }),
1743 );
1744
1745 // 1 byte section with 1 function can't read the function body because
1746 // the section is too small
1747 let mut p = parser_after_header();
1748 assert_matches!(
1749 p.parse(&[3, 2, 1, 0], false),
1750 Ok(Chunk::Parsed {
1751 consumed: 4,
1752 payload: Payload::FunctionSection { .. },
1753 }),
1754 );
1755 assert_matches!(
1756 p.parse(&[10, 1, 1], false),
1757 Ok(Chunk::Parsed {
1758 consumed: 3,
1759 payload: Payload::CodeSectionStart { count: 1, .. },
1760 }),
1761 );
1762 assert_eq!(
1763 p.parse(&[0], false).unwrap_err().message(),
1764 "unexpected end-of-file"
1765 );
1766
1767 // section with 2 functions but section is cut off
1768 let mut p = parser_after_header();
1769 assert_matches!(
1770 p.parse(&[3, 2, 2, 0], false),
1771 Ok(Chunk::Parsed {
1772 consumed: 4,
1773 payload: Payload::FunctionSection { .. },
1774 }),
1775 );
1776 assert_matches!(
1777 p.parse(&[10, 2, 2], false),
1778 Ok(Chunk::Parsed {
1779 consumed: 3,
1780 payload: Payload::CodeSectionStart { count: 2, .. },
1781 }),
1782 );
1783 assert_matches!(
1784 p.parse(&[0], false),
1785 Ok(Chunk::Parsed {
1786 consumed: 1,
1787 payload: Payload::CodeSectionEntry(_),
1788 }),
1789 );
1790 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1791 assert_eq!(
1792 p.parse(&[0], false).unwrap_err().message(),
1793 "unexpected end-of-file",
1794 );
1795
1796 // trailing data is bad
1797 let mut p = parser_after_header();
1798 assert_matches!(
1799 p.parse(&[3, 2, 1, 0], false),
1800 Ok(Chunk::Parsed {
1801 consumed: 4,
1802 payload: Payload::FunctionSection { .. },
1803 }),
1804 );
1805 assert_matches!(
1806 p.parse(&[10, 3, 1], false),
1807 Ok(Chunk::Parsed {
1808 consumed: 3,
1809 payload: Payload::CodeSectionStart { count: 1, .. },
1810 }),
1811 );
1812 assert_matches!(
1813 p.parse(&[0], false),
1814 Ok(Chunk::Parsed {
1815 consumed: 1,
1816 payload: Payload::CodeSectionEntry(_),
1817 }),
1818 );
1819 assert_eq!(
1820 p.parse(&[0], false).unwrap_err().message(),
1821 "trailing bytes at end of section",
1822 );
1823 }
1824
1825 #[test]
1826 fn single_module() {
1827 let mut p = parser_after_component_header();
1828 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1829
1830 // A module that's 8 bytes in length
1831 let mut sub = match p.parse(&[1, 8], false) {
1832 Ok(Chunk::Parsed {
1833 consumed: 2,
1834 payload: Payload::ModuleSection { parser, .. },
1835 }) => parser,
1836 other => panic!("bad parse {other:?}"),
1837 };
1838
1839 // Parse the header of the submodule with the sub-parser.
1840 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1841 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1842 assert_matches!(
1843 sub.parse(b"\0asm\x01\0\0\0", false),
1844 Ok(Chunk::Parsed {
1845 consumed: 8,
1846 payload: Payload::Version {
1847 num: 1,
1848 encoding: Encoding::Module,
1849 ..
1850 },
1851 }),
1852 );
1853
1854 // The sub-parser should be byte-limited so the next byte shouldn't get
1855 // consumed, it's intended for the parent parser.
1856 assert_matches!(
1857 sub.parse(&[10], false),
1858 Ok(Chunk::Parsed {
1859 consumed: 0,
1860 payload: Payload::End(18),
1861 }),
1862 );
1863
1864 // The parent parser should now be back to resuming, and we simulate it
1865 // being done with bytes to ensure that it's safely at the end,
1866 // completing the module code section.
1867 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1868 assert_matches!(
1869 p.parse(&[], true),
1870 Ok(Chunk::Parsed {
1871 consumed: 0,
1872 payload: Payload::End(18),
1873 }),
1874 );
1875 }
1876
1877 #[test]
1878 fn nested_section_too_big() {
1879 let mut p = parser_after_component_header();
1880
1881 // A module that's 10 bytes in length
1882 let mut sub = match p.parse(&[1, 10], false) {
1883 Ok(Chunk::Parsed {
1884 consumed: 2,
1885 payload: Payload::ModuleSection { parser, .. },
1886 }) => parser,
1887 other => panic!("bad parse {other:?}"),
1888 };
1889
1890 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1891 // module.
1892 assert_matches!(
1893 sub.parse(b"\0asm\x01\0\0\0", false),
1894 Ok(Chunk::Parsed {
1895 consumed: 8,
1896 payload: Payload::Version { num: 1, .. },
1897 }),
1898 );
1899
1900 // We can't parse a section which declares its bigger than the outer
1901 // module. This is a custom section, one byte big, with one content byte. The
1902 // content byte, however, lives outside of the parent's module code
1903 // section.
1904 assert_eq!(
1905 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1906 "section too large",
1907 );
1908 }
1909}