wasmparser_nostd/parser.rs
1use crate::CoreTypeSectionReader;
2use crate::{
3 limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
4 ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
5 ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
6 ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
7 GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
8 SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
9};
10use ::alloc::vec::Vec;
11use ::core::convert::TryInto;
12use ::core::fmt;
13use ::core::iter;
14use ::core::ops::Range;
15
16pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
17
18// Note that this started at `0xa` and we're incrementing up from there. When
19// the component model is stabilized this will become 0x1. The changes here are:
20//
21// * [????-??-??] 0xa - original version
22// * [2022-01-05] 0xb - `export` introduces an alias
23// * [2022-02-06] 0xc - `export` has an optional type ascribed to it
24pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xc;
25
26/// The supported encoding formats for the parser.
27#[derive(Debug, Clone, Copy, Eq, PartialEq)]
28pub enum Encoding {
29 /// The encoding format is a WebAssembly module.
30 Module,
31 /// The encoding format is a WebAssembly component.
32 Component,
33}
34
35/// An incremental parser of a binary WebAssembly module or component.
36///
37/// This type is intended to be used to incrementally parse a WebAssembly module
38/// or component as bytes become available for the module. This can also be used
39/// to parse modules or components that are already entirely resident within memory.
40///
41/// This primary function for a parser is the [`Parser::parse`] function which
42/// will incrementally consume input. You can also use the [`Parser::parse_all`]
43/// function to parse a module or component that is entirely resident in memory.
44#[derive(Debug, Clone)]
45pub struct Parser {
46 state: State,
47 offset: u64,
48 max_size: u64,
49 encoding: Encoding,
50}
51
52#[derive(Debug, Clone)]
53enum State {
54 Header,
55 SectionStart,
56 FunctionBody { remaining: u32, len: u32 },
57}
58
59/// A successful return payload from [`Parser::parse`].
60///
61/// On success one of two possible values can be returned, either that more data
62/// is needed to continue parsing or a chunk of the input was parsed, indicating
63/// how much of it was parsed.
64#[derive(Debug)]
65pub enum Chunk<'a> {
66 /// This can be returned at any time and indicates that more data is needed
67 /// to proceed with parsing. Zero bytes were consumed from the input to
68 /// [`Parser::parse`]. The `usize` value here is a hint as to how many more
69 /// bytes are needed to continue parsing.
70 NeedMoreData(u64),
71
72 /// A chunk was successfully parsed.
73 Parsed {
74 /// This many bytes of the `data` input to [`Parser::parse`] were
75 /// consumed to produce `payload`.
76 consumed: usize,
77 /// The value that we actually parsed.
78 payload: Payload<'a>,
79 },
80}
81
82/// Values that can be parsed from a WebAssembly module or component.
83///
84/// This enumeration is all possible chunks of pieces that can be parsed by a
85/// [`Parser`] from a binary WebAssembly module or component. Note that for many
86/// sections the entire section is parsed all at once, whereas other functions,
87/// like the code section, are parsed incrementally. This is a distinction where some
88/// sections, like the type section, are required to be fully resident in memory
89/// (fully downloaded) before proceeding. Other sections, like the code section,
90/// can be processed in a streaming fashion where each function is extracted
91/// individually so it can possibly be shipped to another thread while you wait
92/// for more functions to get downloaded.
93///
94/// Note that payloads, when returned, do not indicate that the module or component
95/// is valid. For example when you receive a `Payload::TypeSection` the type
96/// section itself has not yet actually been parsed. The reader returned will be
97/// able to parse it, but you'll have to actually iterate the reader to do the
98/// full parse. Each payload returned is intended to be a *window* into the
99/// original `data` passed to [`Parser::parse`] which can be further processed
100/// if necessary.
101pub enum Payload<'a> {
102 /// Indicates the header of a WebAssembly module or component.
103 Version {
104 /// The version number found in the header.
105 num: u16,
106 /// The encoding format being parsed.
107 encoding: Encoding,
108 /// The range of bytes that were parsed to consume the header of the
109 /// module or component. Note that this range is relative to the start
110 /// of the byte stream.
111 range: Range<usize>,
112 },
113
114 /// A module type section was received and the provided reader can be
115 /// used to parse the contents of the type section.
116 TypeSection(TypeSectionReader<'a>),
117 /// A module import section was received and the provided reader can be
118 /// used to parse the contents of the import section.
119 ImportSection(ImportSectionReader<'a>),
120 /// A module function section was received and the provided reader can be
121 /// used to parse the contents of the function section.
122 FunctionSection(FunctionSectionReader<'a>),
123 /// A module table section was received and the provided reader can be
124 /// used to parse the contents of the table section.
125 TableSection(TableSectionReader<'a>),
126 /// A module memory section was received and the provided reader can be
127 /// used to parse the contents of the memory section.
128 MemorySection(MemorySectionReader<'a>),
129 /// A module tag section was received, and the provided reader can be
130 /// used to parse the contents of the tag section.
131 TagSection(TagSectionReader<'a>),
132 /// A module global section was received and the provided reader can be
133 /// used to parse the contents of the global section.
134 GlobalSection(GlobalSectionReader<'a>),
135 /// A module export section was received, and the provided reader can be
136 /// used to parse the contents of the export section.
137 ExportSection(ExportSectionReader<'a>),
138 /// A module start section was received.
139 StartSection {
140 /// The start function index
141 func: u32,
142 /// The range of bytes that specify the `func` field, specified in
143 /// offsets relative to the start of the byte stream.
144 range: Range<usize>,
145 },
146 /// A module element section was received and the provided reader can be
147 /// used to parse the contents of the element section.
148 ElementSection(ElementSectionReader<'a>),
149 /// A module data count section was received.
150 DataCountSection {
151 /// The number of data segments.
152 count: u32,
153 /// The range of bytes that specify the `count` field, specified in
154 /// offsets relative to the start of the byte stream.
155 range: Range<usize>,
156 },
157 /// A module data section was received and the provided reader can be
158 /// used to parse the contents of the data section.
159 DataSection(DataSectionReader<'a>),
160 /// Indicator of the start of the code section of a WebAssembly module.
161 ///
162 /// This entry is returned whenever the code section starts. The `count`
163 /// field indicates how many entries are in this code section. After
164 /// receiving this start marker you're guaranteed that the next `count`
165 /// items will be either `CodeSectionEntry` or an error will be returned.
166 ///
167 /// This, unlike other sections, is intended to be used for streaming the
168 /// contents of the code section. The code section is not required to be
169 /// fully resident in memory when we parse it. Instead a [`Parser`] is
170 /// capable of parsing piece-by-piece of a code section.
171 CodeSectionStart {
172 /// The number of functions in this section.
173 count: u32,
174 /// The range of bytes that represent this section, specified in
175 /// offsets relative to the start of the byte stream.
176 range: Range<usize>,
177 /// The size, in bytes, of the remaining contents of this section.
178 ///
179 /// This can be used in combination with [`Parser::skip_section`]
180 /// where the caller will know how many bytes to skip before feeding
181 /// bytes into `Parser` again.
182 size: u32,
183 },
184 /// An entry of the code section, a function, was parsed from a WebAssembly
185 /// module.
186 ///
187 /// This entry indicates that a function was successfully received from the
188 /// code section, and the payload here is the window into the original input
189 /// where the function resides. Note that the function itself has not been
190 /// parsed, it's only been outlined. You'll need to process the
191 /// `FunctionBody` provided to test whether it parses and/or is valid.
192 CodeSectionEntry(FunctionBody<'a>),
193
194 /// A core module section was received and the provided parser can be
195 /// used to parse the nested module.
196 ///
197 /// This variant is special in that it returns a sub-`Parser`. Upon
198 /// receiving a `ModuleSection` it is expected that the returned
199 /// `Parser` will be used instead of the parent `Parser` until the parse has
200 /// finished. You'll need to feed data into the `Parser` returned until it
201 /// returns `Payload::End`. After that you'll switch back to the parent
202 /// parser to resume parsing the rest of the current component.
203 ///
204 /// Note that binaries will not be parsed correctly if you feed the data for
205 /// a nested module into the parent [`Parser`].
206 ModuleSection {
207 /// The parser for the nested module.
208 parser: Parser,
209 /// The range of bytes that represent the nested module in the
210 /// original byte stream.
211 range: Range<usize>,
212 },
213 /// A core instance section was received and the provided parser can be
214 /// used to parse the contents of the core instance section.
215 ///
216 /// Currently this section is only parsed in a component.
217 InstanceSection(InstanceSectionReader<'a>),
218 /// A core type section was received and the provided parser can be
219 /// used to parse the contents of the core type section.
220 ///
221 /// Currently this section is only parsed in a component.
222 CoreTypeSection(CoreTypeSectionReader<'a>),
223 /// A component section from a WebAssembly component was received and the
224 /// provided parser can be used to parse the nested component.
225 ///
226 /// This variant is special in that it returns a sub-`Parser`. Upon
227 /// receiving a `ComponentSection` it is expected that the returned
228 /// `Parser` will be used instead of the parent `Parser` until the parse has
229 /// finished. You'll need to feed data into the `Parser` returned until it
230 /// returns `Payload::End`. After that you'll switch back to the parent
231 /// parser to resume parsing the rest of the current component.
232 ///
233 /// Note that binaries will not be parsed correctly if you feed the data for
234 /// a nested component into the parent [`Parser`].
235 ComponentSection {
236 /// The parser for the nested component.
237 parser: Parser,
238 /// The range of bytes that represent the nested component in the
239 /// original byte stream.
240 range: Range<usize>,
241 },
242 /// A component instance section was received and the provided reader can be
243 /// used to parse the contents of the component instance section.
244 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
245 /// A component alias section was received and the provided reader can be
246 /// used to parse the contents of the component alias section.
247 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
248 /// A component type section was received and the provided reader can be
249 /// used to parse the contents of the component type section.
250 ComponentTypeSection(ComponentTypeSectionReader<'a>),
251 /// A component canonical section was received and the provided reader can be
252 /// used to parse the contents of the component canonical section.
253 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
254 /// A component start section was received.
255 ComponentStartSection {
256 /// The start function description.
257 start: ComponentStartFunction,
258 /// The range of bytes that specify the `start` field.
259 range: Range<usize>,
260 },
261 /// A component import section was received and the provided reader can be
262 /// used to parse the contents of the component import section.
263 ComponentImportSection(ComponentImportSectionReader<'a>),
264 /// A component export section was received, and the provided reader can be
265 /// used to parse the contents of the component export section.
266 ComponentExportSection(ComponentExportSectionReader<'a>),
267
268 /// A module or component custom section was received.
269 CustomSection(CustomSectionReader<'a>),
270
271 /// An unknown section was found.
272 ///
273 /// This variant is returned for all unknown sections encountered. This
274 /// likely wants to be interpreted as an error by consumers of the parser,
275 /// but this can also be used to parse sections currently unsupported by
276 /// the parser.
277 UnknownSection {
278 /// The 8-bit identifier for this section.
279 id: u8,
280 /// The contents of this section.
281 contents: &'a [u8],
282 /// The range of bytes, relative to the start of the original data
283 /// stream, that the contents of this section reside in.
284 range: Range<usize>,
285 },
286
287 /// The end of the WebAssembly module or component was reached.
288 ///
289 /// The value is the offset in the input byte stream where the end
290 /// was reached.
291 End(usize),
292}
293
294const CUSTOM_SECTION: u8 = 0;
295const TYPE_SECTION: u8 = 1;
296const IMPORT_SECTION: u8 = 2;
297const FUNCTION_SECTION: u8 = 3;
298const TABLE_SECTION: u8 = 4;
299const MEMORY_SECTION: u8 = 5;
300const GLOBAL_SECTION: u8 = 6;
301const EXPORT_SECTION: u8 = 7;
302const START_SECTION: u8 = 8;
303const ELEMENT_SECTION: u8 = 9;
304const CODE_SECTION: u8 = 10;
305const DATA_SECTION: u8 = 11;
306const DATA_COUNT_SECTION: u8 = 12;
307const TAG_SECTION: u8 = 13;
308
309const COMPONENT_MODULE_SECTION: u8 = 1;
310const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
311const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
312const COMPONENT_SECTION: u8 = 4;
313const COMPONENT_INSTANCE_SECTION: u8 = 5;
314const COMPONENT_ALIAS_SECTION: u8 = 6;
315const COMPONENT_TYPE_SECTION: u8 = 7;
316const COMPONENT_CANONICAL_SECTION: u8 = 8;
317const COMPONENT_START_SECTION: u8 = 9;
318const COMPONENT_IMPORT_SECTION: u8 = 10;
319const COMPONENT_EXPORT_SECTION: u8 = 11;
320
321impl Parser {
322 /// Creates a new parser.
323 ///
324 /// Reports errors and ranges relative to `offset` provided, where `offset`
325 /// is some logical offset within the input stream that we're parsing.
326 pub fn new(offset: u64) -> Parser {
327 Parser {
328 state: State::Header,
329 offset,
330 max_size: u64::MAX,
331 // Assume the encoding is a module until we know otherwise
332 encoding: Encoding::Module,
333 }
334 }
335
336 /// Attempts to parse a chunk of data.
337 ///
338 /// This method will attempt to parse the next incremental portion of a
339 /// WebAssembly binary. Data available for the module or component is
340 /// provided as `data`, and the data can be incomplete if more data has yet
341 /// to arrive. The `eof` flag indicates whether more data will ever be received.
342 ///
343 /// There are two ways parsing can succeed with this method:
344 ///
345 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
346 /// in `data` to parse a payload. The caller needs to wait for more data to
347 /// be available in this situation before calling this method again. It is
348 /// guaranteed that this is only returned if `eof` is `false`.
349 ///
350 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
351 /// successfully parsed. The payload is available in this variant of what
352 /// was parsed, and this also indicates how many bytes of `data` was
353 /// consumed. It's expected that the caller will not provide these bytes
354 /// back to the [`Parser`] again.
355 ///
356 /// Note that all `Chunk` return values are connected, with a lifetime, to
357 /// the input buffer. Each parsed chunk borrows the input buffer and is a
358 /// view into it for successfully parsed chunks.
359 ///
360 /// It is expected that you'll call this method until `Payload::End` is
361 /// reached, at which point you're guaranteed that the parse has completed.
362 /// Note that complete parsing, for the top-level module or component,
363 /// implies that `data` is empty and `eof` is `true`.
364 ///
365 /// # Errors
366 ///
367 /// Parse errors are returned as an `Err`. Errors can happen when the
368 /// structure of the data is unexpected or if sections are too large for
369 /// example. Note that errors are not returned for malformed *contents* of
370 /// sections here. Sections are generally not individually parsed and each
371 /// returned [`Payload`] needs to be iterated over further to detect all
372 /// errors.
373 ///
374 /// # Examples
375 ///
376 /// An example of reading a wasm file from a stream (`std::io::Read`) and
377 /// incrementally parsing it.
378 ///
379 /// ```
380 /// use std::io::Read;
381 /// use anyhow::Result;
382 /// use wasmparser_nostd::{Parser, Chunk, Payload::*};
383 ///
384 /// fn parse(mut reader: impl Read) -> Result<()> {
385 /// let mut buf = Vec::new();
386 /// let mut parser = Parser::new(0);
387 /// let mut eof = false;
388 /// let mut stack = Vec::new();
389 ///
390 /// loop {
391 /// let (payload, consumed) = match parser.parse(&buf, eof)? {
392 /// Chunk::NeedMoreData(hint) => {
393 /// assert!(!eof); // otherwise an error would be returned
394 ///
395 /// // Use the hint to preallocate more space, then read
396 /// // some more data into our buffer.
397 /// //
398 /// // Note that the buffer management here is not ideal,
399 /// // but it's compact enough to fit in an example!
400 /// let len = buf.len();
401 /// buf.extend((0..hint).map(|_| 0u8));
402 /// let n = reader.read(&mut buf[len..])?;
403 /// buf.truncate(len + n);
404 /// eof = n == 0;
405 /// continue;
406 /// }
407 ///
408 /// Chunk::Parsed { consumed, payload } => (payload, consumed),
409 /// };
410 ///
411 /// match payload {
412 /// // Sections for WebAssembly modules
413 /// Version { .. } => { /* ... */ }
414 /// TypeSection(_) => { /* ... */ }
415 /// ImportSection(_) => { /* ... */ }
416 /// FunctionSection(_) => { /* ... */ }
417 /// TableSection(_) => { /* ... */ }
418 /// MemorySection(_) => { /* ... */ }
419 /// TagSection(_) => { /* ... */ }
420 /// GlobalSection(_) => { /* ... */ }
421 /// ExportSection(_) => { /* ... */ }
422 /// StartSection { .. } => { /* ... */ }
423 /// ElementSection(_) => { /* ... */ }
424 /// DataCountSection { .. } => { /* ... */ }
425 /// DataSection(_) => { /* ... */ }
426 ///
427 /// // Here we know how many functions we'll be receiving as
428 /// // `CodeSectionEntry`, so we can prepare for that, and
429 /// // afterwards we can parse and handle each function
430 /// // individually.
431 /// CodeSectionStart { .. } => { /* ... */ }
432 /// CodeSectionEntry(body) => {
433 /// // here we can iterate over `body` to parse the function
434 /// // and its locals
435 /// }
436 ///
437 /// // Sections for WebAssembly components
438 /// ModuleSection { .. } => { /* ... */ }
439 /// InstanceSection(_) => { /* ... */ }
440 /// CoreTypeSection(_) => { /* ... */ }
441 /// ComponentSection { .. } => { /* ... */ }
442 /// ComponentInstanceSection(_) => { /* ... */ }
443 /// ComponentAliasSection(_) => { /* ... */ }
444 /// ComponentTypeSection(_) => { /* ... */ }
445 /// ComponentCanonicalSection(_) => { /* ... */ }
446 /// ComponentStartSection { .. } => { /* ... */ }
447 /// ComponentImportSection(_) => { /* ... */ }
448 /// ComponentExportSection(_) => { /* ... */ }
449 ///
450 /// CustomSection(_) => { /* ... */ }
451 ///
452 /// // most likely you'd return an error here
453 /// UnknownSection { id, .. } => { /* ... */ }
454 ///
455 /// // Once we've reached the end of a parser we either resume
456 /// // at the parent parser or we break out of the loop because
457 /// // we're done.
458 /// End(_) => {
459 /// if let Some(parent_parser) = stack.pop() {
460 /// parser = parent_parser;
461 /// } else {
462 /// break;
463 /// }
464 /// }
465 /// }
466 ///
467 /// // once we're done processing the payload we can forget the
468 /// // original.
469 /// buf.drain(..consumed);
470 /// }
471 ///
472 /// Ok(())
473 /// }
474 ///
475 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
476 /// ```
477 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
478 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
479 (&data[..(self.max_size as usize)], true)
480 } else {
481 (data, eof)
482 };
483 // TODO: thread through `offset: u64` to `BinaryReader`, remove
484 // the cast here.
485 let mut reader = BinaryReader::new_with_offset(data, self.offset as usize);
486 match self.parse_reader(&mut reader, eof) {
487 Ok(payload) => {
488 // Be sure to update our offset with how far we got in the
489 // reader
490 self.offset += usize_to_u64(reader.position);
491 self.max_size -= usize_to_u64(reader.position);
492 Ok(Chunk::Parsed {
493 consumed: reader.position,
494 payload,
495 })
496 }
497 Err(e) => {
498 // If we're at EOF then there's no way we can recover from any
499 // error, so continue to propagate it.
500 if eof {
501 return Err(e);
502 }
503
504 // If our error doesn't look like it can be resolved with more
505 // data being pulled down, then propagate it, otherwise switch
506 // the error to "feed me please"
507 match e.inner.needed_hint {
508 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
509 None => Err(e),
510 }
511 }
512 }
513 }
514
515 fn parse_reader<'a>(
516 &mut self,
517 reader: &mut BinaryReader<'a>,
518 eof: bool,
519 ) -> Result<Payload<'a>> {
520 use Payload::*;
521
522 match self.state {
523 State::Header => {
524 const KIND_MODULE: u16 = 0x00;
525 const KIND_COMPONENT: u16 = 0x01;
526
527 let start = reader.original_position();
528 let header_version = reader.read_header_version()?;
529 self.encoding = match (header_version >> 16) as u16 {
530 KIND_MODULE => Encoding::Module,
531 KIND_COMPONENT => Encoding::Component,
532 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
533 };
534 let num = header_version as u16;
535 self.state = State::SectionStart;
536 Ok(Version {
537 num,
538 encoding: self.encoding,
539 range: start..reader.original_position(),
540 })
541 }
542 State::SectionStart => {
543 // If we're at eof and there are no bytes in our buffer, then
544 // that means we reached the end of the data since it's
545 // just a bunch of sections concatenated after the header.
546 if eof && reader.bytes_remaining() == 0 {
547 return Ok(Payload::End(reader.original_position()));
548 }
549
550 let id_pos = reader.position;
551 let id = reader.read_u8()?;
552 if id & 0x80 != 0 {
553 return Err(BinaryReaderError::new("malformed section id", id_pos));
554 }
555 let len_pos = reader.original_position();
556 let mut len = reader.read_var_u32()?;
557
558 // Test to make sure that this section actually fits within
559 // `Parser::max_size`. This doesn't matter for top-level modules
560 // but it is required for nested modules/components to correctly ensure
561 // that all sections live entirely within their section of the
562 // file.
563 let section_overflow = self
564 .max_size
565 .checked_sub(usize_to_u64(reader.position))
566 .and_then(|s| s.checked_sub(len.into()))
567 .is_none();
568 if section_overflow {
569 return Err(BinaryReaderError::new("section too large", len_pos));
570 }
571
572 match (self.encoding, id) {
573 // Sections for both modules and components.
574 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
575
576 // Module sections
577 (Encoding::Module, TYPE_SECTION) => {
578 section(reader, len, TypeSectionReader::new, TypeSection)
579 }
580 (Encoding::Module, IMPORT_SECTION) => {
581 section(reader, len, ImportSectionReader::new, ImportSection)
582 }
583 (Encoding::Module, FUNCTION_SECTION) => {
584 section(reader, len, FunctionSectionReader::new, FunctionSection)
585 }
586 (Encoding::Module, TABLE_SECTION) => {
587 section(reader, len, TableSectionReader::new, TableSection)
588 }
589 (Encoding::Module, MEMORY_SECTION) => {
590 section(reader, len, MemorySectionReader::new, MemorySection)
591 }
592 (Encoding::Module, GLOBAL_SECTION) => {
593 section(reader, len, GlobalSectionReader::new, GlobalSection)
594 }
595 (Encoding::Module, EXPORT_SECTION) => {
596 section(reader, len, ExportSectionReader::new, ExportSection)
597 }
598 (Encoding::Module, START_SECTION) => {
599 let (func, range) = single_item(reader, len, "start")?;
600 Ok(StartSection { func, range })
601 }
602 (Encoding::Module, ELEMENT_SECTION) => {
603 section(reader, len, ElementSectionReader::new, ElementSection)
604 }
605 (Encoding::Module, CODE_SECTION) => {
606 let start = reader.original_position();
607 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
608 let range = start..reader.original_position() + len as usize;
609 self.state = State::FunctionBody {
610 remaining: count,
611 len,
612 };
613 Ok(CodeSectionStart {
614 count,
615 range,
616 size: len,
617 })
618 }
619 (Encoding::Module, DATA_SECTION) => {
620 section(reader, len, DataSectionReader::new, DataSection)
621 }
622 (Encoding::Module, DATA_COUNT_SECTION) => {
623 let (count, range) = single_item(reader, len, "data count")?;
624 Ok(DataCountSection { count, range })
625 }
626 (Encoding::Module, TAG_SECTION) => {
627 section(reader, len, TagSectionReader::new, TagSection)
628 }
629
630 // Component sections
631 (Encoding::Component, COMPONENT_MODULE_SECTION)
632 | (Encoding::Component, COMPONENT_SECTION) => {
633 if len as usize > MAX_WASM_MODULE_SIZE {
634 bail!(
635 len_pos,
636 "{} section is too large",
637 if id == 1 { "module" } else { "component " }
638 );
639 }
640
641 let range =
642 reader.original_position()..reader.original_position() + len as usize;
643 self.max_size -= u64::from(len);
644 self.offset += u64::from(len);
645 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
646 parser.max_size = len.into();
647
648 Ok(match id {
649 1 => ModuleSection { parser, range },
650 4 => ComponentSection { parser, range },
651 _ => unreachable!(),
652 })
653 }
654 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
655 section(reader, len, InstanceSectionReader::new, InstanceSection)
656 }
657 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
658 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
659 }
660 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
661 reader,
662 len,
663 ComponentInstanceSectionReader::new,
664 ComponentInstanceSection,
665 ),
666 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
667 section(reader, len, SectionLimited::new, ComponentAliasSection)
668 }
669 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
670 reader,
671 len,
672 ComponentTypeSectionReader::new,
673 ComponentTypeSection,
674 ),
675 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
676 reader,
677 len,
678 ComponentCanonicalSectionReader::new,
679 ComponentCanonicalSection,
680 ),
681 (Encoding::Component, COMPONENT_START_SECTION) => {
682 let (start, range) = single_item(reader, len, "component start")?;
683 Ok(ComponentStartSection { start, range })
684 }
685 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
686 reader,
687 len,
688 ComponentImportSectionReader::new,
689 ComponentImportSection,
690 ),
691 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
692 reader,
693 len,
694 ComponentExportSectionReader::new,
695 ComponentExportSection,
696 ),
697 (_, id) => {
698 let offset = reader.original_position();
699 let contents = reader.read_bytes(len as usize)?;
700 let range = offset..offset + len as usize;
701 Ok(UnknownSection {
702 id,
703 contents,
704 range,
705 })
706 }
707 }
708 }
709
710 // Once we hit 0 remaining incrementally parsed items, with 0
711 // remaining bytes in each section, we're done and can switch back
712 // to parsing sections.
713 State::FunctionBody {
714 remaining: 0,
715 len: 0,
716 } => {
717 self.state = State::SectionStart;
718 self.parse_reader(reader, eof)
719 }
720
721 // ... otherwise trailing bytes with no remaining entries in these
722 // sections indicates an error.
723 State::FunctionBody { remaining: 0, len } => {
724 debug_assert!(len > 0);
725 let offset = reader.original_position();
726 Err(BinaryReaderError::new(
727 "trailing bytes at end of section",
728 offset,
729 ))
730 }
731
732 // Functions are relatively easy to parse when we know there's at
733 // least one remaining and at least one byte available to read
734 // things.
735 //
736 // We use the remaining length try to read a u32 size of the
737 // function, and using that size we require the entire function be
738 // resident in memory. This means that we're reading whole chunks of
739 // functions at a time.
740 //
741 // Limiting via `Parser::max_size` (nested parsing) happens above in
742 // `fn parse`, and limiting by our section size happens via
743 // `delimited`. Actual parsing of the function body is delegated to
744 // the caller to iterate over the `FunctionBody` structure.
745 State::FunctionBody { remaining, mut len } => {
746 let body = delimited(reader, &mut len, |r| {
747 let size = r.read_var_u32()?;
748 let offset = r.original_position();
749 Ok(FunctionBody::new(offset, r.read_bytes(size as usize)?))
750 })?;
751 self.state = State::FunctionBody {
752 remaining: remaining - 1,
753 len,
754 };
755 Ok(CodeSectionEntry(body))
756 }
757 }
758 }
759
760 /// Convenience function that can be used to parse a module or component
761 /// that is entirely resident in memory.
762 ///
763 /// This function will parse the `data` provided as a WebAssembly module
764 /// or component.
765 ///
766 /// Note that when this function yields sections that provide parsers,
767 /// no further action is required for those sections as payloads from
768 /// those parsers will be automatically returned.
769 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
770 let mut stack = Vec::new();
771 let mut cur = self;
772 let mut done = false;
773 iter::from_fn(move || {
774 if done {
775 return None;
776 }
777 let payload = match cur.parse(data, true) {
778 // Propagate all errors
779 Err(e) => {
780 done = true;
781 return Some(Err(e));
782 }
783
784 // This isn't possible because `eof` is always true.
785 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
786
787 Ok(Chunk::Parsed { payload, consumed }) => {
788 data = &data[consumed..];
789 payload
790 }
791 };
792
793 match &payload {
794 Payload::ModuleSection { parser, .. }
795 | Payload::ComponentSection { parser, .. } => {
796 stack.push(cur.clone());
797 cur = parser.clone();
798 }
799 Payload::End(_) => match stack.pop() {
800 Some(p) => cur = p,
801 None => done = true,
802 },
803
804 _ => {}
805 }
806
807 Some(Ok(payload))
808 })
809 }
810
811 /// Skip parsing the code section entirely.
812 ///
813 /// This function can be used to indicate, after receiving
814 /// `CodeSectionStart`, that the section will not be parsed.
815 ///
816 /// The caller will be responsible for skipping `size` bytes (found in the
817 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
818 /// after the `size` bytes have been skipped.
819 ///
820 /// # Panics
821 ///
822 /// This function will panic if the parser is not in a state where it's
823 /// parsing the code section.
824 ///
825 /// # Examples
826 ///
827 /// ```
828 /// use wasmparser_nostd::{Result, Parser, Chunk, Payload::*};
829 /// use ::core::ops::Range;
830 ///
831 /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
832 /// let mut parser = Parser::new(0);
833 /// loop {
834 /// let payload = match parser.parse(wasm, true)? {
835 /// Chunk::Parsed { consumed, payload } => {
836 /// wasm = &wasm[consumed..];
837 /// payload
838 /// }
839 /// // this state isn't possible with `eof = true`
840 /// Chunk::NeedMoreData(_) => unreachable!(),
841 /// };
842 /// match payload {
843 /// TypeSection(s) => print_range("type section", &s.range()),
844 /// ImportSection(s) => print_range("import section", &s.range()),
845 /// // .. other sections
846 ///
847 /// // Print the range of the code section we see, but don't
848 /// // actually iterate over each individual function.
849 /// CodeSectionStart { range, size, .. } => {
850 /// print_range("code section", &range);
851 /// parser.skip_section();
852 /// wasm = &wasm[size as usize..];
853 /// }
854 /// End(_) => break,
855 /// _ => {}
856 /// }
857 /// }
858 /// Ok(())
859 /// }
860 ///
861 /// fn print_range(section: &str, range: &Range<usize>) {
862 /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
863 /// }
864 /// ```
865 pub fn skip_section(&mut self) {
866 let skip = match self.state {
867 State::FunctionBody { remaining: _, len } => len,
868 _ => panic!("wrong state to call `skip_section`"),
869 };
870 self.offset += u64::from(skip);
871 self.max_size -= u64::from(skip);
872 self.state = State::SectionStart;
873 }
874}
875
876fn usize_to_u64(a: usize) -> u64 {
877 a.try_into().unwrap()
878}
879
880/// Parses an entire section resident in memory into a `Payload`.
881///
882/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
883/// to construct the section to return.
884fn section<'a, T>(
885 reader: &mut BinaryReader<'a>,
886 len: u32,
887 ctor: fn(&'a [u8], usize) -> Result<T>,
888 variant: fn(T) -> Payload<'a>,
889) -> Result<Payload<'a>> {
890 let offset = reader.original_position();
891 let payload = reader.read_bytes(len as usize)?;
892 // clear the hint for "need this many more bytes" here because we already
893 // read all the bytes, so it's not possible to read more bytes if this
894 // fails.
895 let reader = ctor(payload, offset).map_err(clear_hint)?;
896 Ok(variant(reader))
897}
898
899/// Reads a section that is represented by a single uleb-encoded `u32`.
900fn single_item<'a, T>(
901 reader: &mut BinaryReader<'a>,
902 len: u32,
903 desc: &str,
904) -> Result<(T, Range<usize>)>
905where
906 T: FromReader<'a>,
907{
908 let range = reader.original_position()..reader.original_position() + len as usize;
909 let mut content = BinaryReader::new_with_offset(reader.read_bytes(len as usize)?, range.start);
910 // We can't recover from "unexpected eof" here because our entire section is
911 // already resident in memory, so clear the hint for how many more bytes are
912 // expected.
913 let ret = content.read().map_err(clear_hint)?;
914 if !content.eof() {
915 bail!(
916 content.original_position(),
917 "unexpected content in the {desc} section",
918 );
919 }
920 Ok((ret, range))
921}
922
923/// Attempts to parse using `f`.
924///
925/// This will update `*len` with the number of bytes consumed, and it will cause
926/// a failure to be returned instead of the number of bytes consumed exceeds
927/// what `*len` currently is.
928fn delimited<'a, T>(
929 reader: &mut BinaryReader<'a>,
930 len: &mut u32,
931 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
932) -> Result<T> {
933 let start = reader.position;
934 let ret = f(reader)?;
935 *len = match (reader.position - start)
936 .try_into()
937 .ok()
938 .and_then(|i| len.checked_sub(i))
939 {
940 Some(i) => i,
941 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
942 };
943 Ok(ret)
944}
945
946impl Default for Parser {
947 fn default() -> Parser {
948 Parser::new(0)
949 }
950}
951
952impl Payload<'_> {
953 /// If this `Payload` represents a section in the original wasm module then
954 /// the section's id and range within the original wasm binary are returned.
955 ///
956 /// Not all payloads refer to entire sections, such as the `Version` and
957 /// `CodeSectionEntry` variants. These variants will return `None` from this
958 /// function.
959 ///
960 /// Otherwise this function will return `Some` where the first element is
961 /// the byte identifier for the section and the second element is the range
962 /// of the contents of the section within the original wasm binary.
963 ///
964 /// The purpose of this method is to enable tools to easily iterate over
965 /// entire sections if necessary and handle sections uniformly, for example
966 /// dropping custom sections while preserving all other sections.
967 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
968 use Payload::*;
969
970 match self {
971 Version { .. } => None,
972 TypeSection(s) => Some((TYPE_SECTION, s.range())),
973 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
974 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
975 TableSection(s) => Some((TABLE_SECTION, s.range())),
976 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
977 TagSection(s) => Some((TAG_SECTION, s.range())),
978 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
979 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
980 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
981 DataSection(s) => Some((DATA_SECTION, s.range())),
982 StartSection { range, .. } => Some((START_SECTION, range.clone())),
983 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
984 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
985 CodeSectionEntry(_) => None,
986
987 ModuleSection { range, .. } => Some((COMPONENT_MODULE_SECTION, range.clone())),
988 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
989 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
990 ComponentSection { range, .. } => Some((COMPONENT_SECTION, range.clone())),
991 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
992 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
993 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
994 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
995 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
996 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
997 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
998
999 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1000
1001 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1002
1003 End(_) => None,
1004 }
1005 }
1006}
1007
1008impl fmt::Debug for Payload<'_> {
1009 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1010 use Payload::*;
1011 match self {
1012 Version {
1013 num,
1014 encoding,
1015 range,
1016 } => f
1017 .debug_struct("Version")
1018 .field("num", num)
1019 .field("encoding", encoding)
1020 .field("range", range)
1021 .finish(),
1022
1023 // Module sections
1024 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1025 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1026 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1027 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1028 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1029 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1030 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1031 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1032 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1033 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1034 StartSection { func, range } => f
1035 .debug_struct("StartSection")
1036 .field("func", func)
1037 .field("range", range)
1038 .finish(),
1039 DataCountSection { count, range } => f
1040 .debug_struct("DataCountSection")
1041 .field("count", count)
1042 .field("range", range)
1043 .finish(),
1044 CodeSectionStart { count, range, size } => f
1045 .debug_struct("CodeSectionStart")
1046 .field("count", count)
1047 .field("range", range)
1048 .field("size", size)
1049 .finish(),
1050 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1051
1052 // Component sections
1053 ModuleSection { parser: _, range } => f
1054 .debug_struct("ModuleSection")
1055 .field("range", range)
1056 .finish(),
1057 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1058 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1059 ComponentSection { parser: _, range } => f
1060 .debug_struct("ComponentSection")
1061 .field("range", range)
1062 .finish(),
1063 ComponentInstanceSection(_) => f
1064 .debug_tuple("ComponentInstanceSection")
1065 .field(&"...")
1066 .finish(),
1067 ComponentAliasSection(_) => f
1068 .debug_tuple("ComponentAliasSection")
1069 .field(&"...")
1070 .finish(),
1071 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1072 ComponentCanonicalSection(_) => f
1073 .debug_tuple("ComponentCanonicalSection")
1074 .field(&"...")
1075 .finish(),
1076 ComponentStartSection { .. } => f
1077 .debug_tuple("ComponentStartSection")
1078 .field(&"...")
1079 .finish(),
1080 ComponentImportSection(_) => f
1081 .debug_tuple("ComponentImportSection")
1082 .field(&"...")
1083 .finish(),
1084 ComponentExportSection(_) => f
1085 .debug_tuple("ComponentExportSection")
1086 .field(&"...")
1087 .finish(),
1088
1089 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1090
1091 UnknownSection { id, range, .. } => f
1092 .debug_struct("UnknownSection")
1093 .field("id", id)
1094 .field("range", range)
1095 .finish(),
1096
1097 End(offset) => f.debug_tuple("End").field(offset).finish(),
1098 }
1099 }
1100}
1101
1102fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1103 err.inner.needed_hint = None;
1104 err
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109 use super::*;
1110
1111 macro_rules! assert_matches {
1112 ($a:expr, $b:pat $(,)?) => {
1113 match $a {
1114 $b => {}
1115 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1116 }
1117 };
1118 }
1119
1120 #[test]
1121 fn header() {
1122 assert!(Parser::default().parse(&[], true).is_err());
1123 assert_matches!(
1124 Parser::default().parse(&[], false),
1125 Ok(Chunk::NeedMoreData(4)),
1126 );
1127 assert_matches!(
1128 Parser::default().parse(b"\0", false),
1129 Ok(Chunk::NeedMoreData(3)),
1130 );
1131 assert_matches!(
1132 Parser::default().parse(b"\0asm", false),
1133 Ok(Chunk::NeedMoreData(4)),
1134 );
1135 assert_matches!(
1136 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1137 Ok(Chunk::Parsed {
1138 consumed: 8,
1139 payload: Payload::Version { num: 1, .. },
1140 }),
1141 );
1142 }
1143
1144 #[test]
1145 fn header_iter() {
1146 for _ in Parser::default().parse_all(&[]) {}
1147 for _ in Parser::default().parse_all(b"\0") {}
1148 for _ in Parser::default().parse_all(b"\0asm") {}
1149 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1150 }
1151
1152 fn parser_after_header() -> Parser {
1153 let mut p = Parser::default();
1154 assert_matches!(
1155 p.parse(b"\0asm\x01\0\0\0", false),
1156 Ok(Chunk::Parsed {
1157 consumed: 8,
1158 payload: Payload::Version {
1159 num: WASM_MODULE_VERSION,
1160 encoding: Encoding::Module,
1161 ..
1162 },
1163 }),
1164 );
1165 p
1166 }
1167
1168 fn parser_after_component_header() -> Parser {
1169 let mut p = Parser::default();
1170 assert_matches!(
1171 p.parse(b"\0asm\x0c\0\x01\0", false),
1172 Ok(Chunk::Parsed {
1173 consumed: 8,
1174 payload: Payload::Version {
1175 num: WASM_COMPONENT_VERSION,
1176 encoding: Encoding::Component,
1177 ..
1178 },
1179 }),
1180 );
1181 p
1182 }
1183
1184 #[test]
1185 fn start_section() {
1186 assert_matches!(
1187 parser_after_header().parse(&[], false),
1188 Ok(Chunk::NeedMoreData(1)),
1189 );
1190 assert!(parser_after_header().parse(&[8], true).is_err());
1191 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1192 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1193 assert_matches!(
1194 parser_after_header().parse(&[8], false),
1195 Ok(Chunk::NeedMoreData(1)),
1196 );
1197 assert_matches!(
1198 parser_after_header().parse(&[8, 1], false),
1199 Ok(Chunk::NeedMoreData(1)),
1200 );
1201 assert_matches!(
1202 parser_after_header().parse(&[8, 2], false),
1203 Ok(Chunk::NeedMoreData(2)),
1204 );
1205 assert_matches!(
1206 parser_after_header().parse(&[8, 1, 1], false),
1207 Ok(Chunk::Parsed {
1208 consumed: 3,
1209 payload: Payload::StartSection { func: 1, .. },
1210 }),
1211 );
1212 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1213 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1214 }
1215
1216 #[test]
1217 fn end_works() {
1218 assert_matches!(
1219 parser_after_header().parse(&[], true),
1220 Ok(Chunk::Parsed {
1221 consumed: 0,
1222 payload: Payload::End(8),
1223 }),
1224 );
1225 }
1226
1227 #[test]
1228 fn type_section() {
1229 assert!(parser_after_header().parse(&[1], true).is_err());
1230 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1231 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1232 assert_matches!(
1233 parser_after_header().parse(&[1], false),
1234 Ok(Chunk::NeedMoreData(1)),
1235 );
1236 assert_matches!(
1237 parser_after_header().parse(&[1, 1], false),
1238 Ok(Chunk::NeedMoreData(1)),
1239 );
1240 assert_matches!(
1241 parser_after_header().parse(&[1, 1, 1], false),
1242 Ok(Chunk::Parsed {
1243 consumed: 3,
1244 payload: Payload::TypeSection(_),
1245 }),
1246 );
1247 assert_matches!(
1248 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1249 Ok(Chunk::Parsed {
1250 consumed: 3,
1251 payload: Payload::TypeSection(_),
1252 }),
1253 );
1254 }
1255
1256 #[test]
1257 fn custom_section() {
1258 assert!(parser_after_header().parse(&[0], true).is_err());
1259 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1260 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1261 assert_matches!(
1262 parser_after_header().parse(&[0, 2, 1], false),
1263 Ok(Chunk::NeedMoreData(1)),
1264 );
1265 assert_matches!(
1266 parser_after_header().parse(&[0, 1, 0], false),
1267 Ok(Chunk::Parsed {
1268 consumed: 3,
1269 payload: Payload::CustomSection(CustomSectionReader {
1270 name: "",
1271 data_offset: 11,
1272 data: b"",
1273 range: Range { start: 10, end: 11 },
1274 }),
1275 }),
1276 );
1277 assert_matches!(
1278 parser_after_header().parse(&[0, 2, 1, b'a'], false),
1279 Ok(Chunk::Parsed {
1280 consumed: 4,
1281 payload: Payload::CustomSection(CustomSectionReader {
1282 name: "a",
1283 data_offset: 12,
1284 data: b"",
1285 range: Range { start: 10, end: 12 },
1286 }),
1287 }),
1288 );
1289 assert_matches!(
1290 parser_after_header().parse(&[0, 2, 0, b'a'], false),
1291 Ok(Chunk::Parsed {
1292 consumed: 4,
1293 payload: Payload::CustomSection(CustomSectionReader {
1294 name: "",
1295 data_offset: 11,
1296 data: b"a",
1297 range: Range { start: 10, end: 12 },
1298 }),
1299 }),
1300 );
1301 }
1302
1303 #[test]
1304 fn function_section() {
1305 assert!(parser_after_header().parse(&[10], true).is_err());
1306 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1307 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1308 assert_matches!(
1309 parser_after_header().parse(&[10], false),
1310 Ok(Chunk::NeedMoreData(1))
1311 );
1312 assert_matches!(
1313 parser_after_header().parse(&[10, 1], false),
1314 Ok(Chunk::NeedMoreData(1))
1315 );
1316 let mut p = parser_after_header();
1317 assert_matches!(
1318 p.parse(&[10, 1, 0], false),
1319 Ok(Chunk::Parsed {
1320 consumed: 3,
1321 payload: Payload::CodeSectionStart { count: 0, .. },
1322 }),
1323 );
1324 assert_matches!(
1325 p.parse(&[], true),
1326 Ok(Chunk::Parsed {
1327 consumed: 0,
1328 payload: Payload::End(11),
1329 }),
1330 );
1331 let mut p = parser_after_header();
1332 assert_matches!(
1333 p.parse(&[10, 2, 1, 0], false),
1334 Ok(Chunk::Parsed {
1335 consumed: 3,
1336 payload: Payload::CodeSectionStart { count: 1, .. },
1337 }),
1338 );
1339 assert_matches!(
1340 p.parse(&[0], false),
1341 Ok(Chunk::Parsed {
1342 consumed: 1,
1343 payload: Payload::CodeSectionEntry(_),
1344 }),
1345 );
1346 assert_matches!(
1347 p.parse(&[], true),
1348 Ok(Chunk::Parsed {
1349 consumed: 0,
1350 payload: Payload::End(12),
1351 }),
1352 );
1353
1354 // 1 byte section with 1 function can't read the function body because
1355 // the section is too small
1356 let mut p = parser_after_header();
1357 assert_matches!(
1358 p.parse(&[10, 1, 1], false),
1359 Ok(Chunk::Parsed {
1360 consumed: 3,
1361 payload: Payload::CodeSectionStart { count: 1, .. },
1362 }),
1363 );
1364 assert_eq!(
1365 p.parse(&[0], false).unwrap_err().message(),
1366 "unexpected end-of-file"
1367 );
1368
1369 // section with 2 functions but section is cut off
1370 let mut p = parser_after_header();
1371 assert_matches!(
1372 p.parse(&[10, 2, 2], false),
1373 Ok(Chunk::Parsed {
1374 consumed: 3,
1375 payload: Payload::CodeSectionStart { count: 2, .. },
1376 }),
1377 );
1378 assert_matches!(
1379 p.parse(&[0], false),
1380 Ok(Chunk::Parsed {
1381 consumed: 1,
1382 payload: Payload::CodeSectionEntry(_),
1383 }),
1384 );
1385 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1386 assert_eq!(
1387 p.parse(&[0], false).unwrap_err().message(),
1388 "unexpected end-of-file",
1389 );
1390
1391 // trailing data is bad
1392 let mut p = parser_after_header();
1393 assert_matches!(
1394 p.parse(&[10, 3, 1], false),
1395 Ok(Chunk::Parsed {
1396 consumed: 3,
1397 payload: Payload::CodeSectionStart { count: 1, .. },
1398 }),
1399 );
1400 assert_matches!(
1401 p.parse(&[0], false),
1402 Ok(Chunk::Parsed {
1403 consumed: 1,
1404 payload: Payload::CodeSectionEntry(_),
1405 }),
1406 );
1407 assert_eq!(
1408 p.parse(&[0], false).unwrap_err().message(),
1409 "trailing bytes at end of section",
1410 );
1411 }
1412
1413 #[test]
1414 fn single_module() {
1415 let mut p = parser_after_component_header();
1416 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1417
1418 // A module that's 8 bytes in length
1419 let mut sub = match p.parse(&[1, 8], false) {
1420 Ok(Chunk::Parsed {
1421 consumed: 2,
1422 payload: Payload::ModuleSection { parser, .. },
1423 }) => parser,
1424 other => panic!("bad parse {:?}", other),
1425 };
1426
1427 // Parse the header of the submodule with the sub-parser.
1428 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1429 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1430 assert_matches!(
1431 sub.parse(b"\0asm\x01\0\0\0", false),
1432 Ok(Chunk::Parsed {
1433 consumed: 8,
1434 payload: Payload::Version {
1435 num: 1,
1436 encoding: Encoding::Module,
1437 ..
1438 },
1439 }),
1440 );
1441
1442 // The sub-parser should be byte-limited so the next byte shouldn't get
1443 // consumed, it's intended for the parent parser.
1444 assert_matches!(
1445 sub.parse(&[10], false),
1446 Ok(Chunk::Parsed {
1447 consumed: 0,
1448 payload: Payload::End(18),
1449 }),
1450 );
1451
1452 // The parent parser should now be back to resuming, and we simulate it
1453 // being done with bytes to ensure that it's safely at the end,
1454 // completing the module code section.
1455 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1456 assert_matches!(
1457 p.parse(&[], true),
1458 Ok(Chunk::Parsed {
1459 consumed: 0,
1460 payload: Payload::End(18),
1461 }),
1462 );
1463 }
1464
1465 #[test]
1466 fn nested_section_too_big() {
1467 let mut p = parser_after_component_header();
1468
1469 // A module that's 10 bytes in length
1470 let mut sub = match p.parse(&[1, 10], false) {
1471 Ok(Chunk::Parsed {
1472 consumed: 2,
1473 payload: Payload::ModuleSection { parser, .. },
1474 }) => parser,
1475 other => panic!("bad parse {:?}", other),
1476 };
1477
1478 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1479 // module.
1480 assert_matches!(
1481 sub.parse(b"\0asm\x01\0\0\0", false),
1482 Ok(Chunk::Parsed {
1483 consumed: 8,
1484 payload: Payload::Version { num: 1, .. },
1485 }),
1486 );
1487
1488 // We can't parse a section which declares its bigger than the outer
1489 // module. This is a custom section, one byte big, with one content byte. The
1490 // content byte, however, lives outside of the parent's module code
1491 // section.
1492 assert_eq!(
1493 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1494 "section too large",
1495 );
1496 }
1497}