wat/
lib.rs

1//! A Rust parser for the [WebAssembly Text format][wat]
2//!
3//! This crate contains a stable interface to the parser for the [WAT][wat]
4//! format of WebAssembly text files. The format parsed by this crate follows
5//! the [online specification][wat].
6//!
7//! # Examples
8//!
9//! Parse an in-memory string:
10//!
11//! ```
12//! # fn foo() -> wat::Result<()> {
13//! let wat = r#"
14//!     (module
15//!         (func $foo)
16//!
17//!         (func (export "bar")
18//!             call $foo
19//!         )
20//!     )
21//! "#;
22//!
23//! let binary = wat::parse_str(wat)?;
24//! // ...
25//! # Ok(())
26//! # }
27//! ```
28//!
29//! Parse an on-disk file:
30//!
31//! ```
32//! # fn foo() -> wat::Result<()> {
33//! let binary = wat::parse_file("./foo.wat")?;
34//! // ...
35//! # Ok(())
36//! # }
37//! ```
38//!
39//! ## Evolution of the WAT Format
40//!
41//! WebAssembly, and the WAT format, are an evolving specification. Features are
42//! added to WAT, WAT changes, and sometimes WAT breaks. The policy of this
43//! crate is that it will always follow the [official specification][wat] for
44//! WAT files.
45//!
46//! Future WebAssembly features will be accepted to this parser **and they will
47//! not require a feature gate to opt-in**. All implemented WebAssembly features
48//! will be enabled at all times. Using a future WebAssembly feature in the WAT
49//! format may cause breakage because while specifications are in development
50//! the WAT syntax (and/or binary encoding) will often change. This crate will
51//! do its best to keep up with these proposals, but breaking textual changes
52//! will be published as non-breaking semver changes to this crate.
53//!
54//! ## Stability
55//!
56//! This crate is intended to be a very stable shim over the `wast` crate
57//! which is expected to be much more unstable. The `wast` crate contains
58//! AST data structures for parsing `*.wat` files and they will evolve was the
59//! WAT and WebAssembly specifications evolve over time.
60//!
61//! This crate is currently at version 1.x.y, and it is intended that it will
62//! remain here for quite some time. Breaking changes to the WAT format will be
63//! landed as a non-semver-breaking version change in this crate. This crate
64//! will always follow the [official specification for WAT][wat].
65//!
66//! [wat]: http://webassembly.github.io/spec/core/text/index.html
67
68#![deny(missing_docs)]
69#![cfg_attr(docsrs, feature(doc_auto_cfg))]
70
71use std::borrow::Cow;
72use std::fmt;
73use std::path::{Path, PathBuf};
74use std::str;
75use wast::core::EncodeOptions;
76use wast::lexer::{Lexer, TokenKind};
77use wast::parser::{self, ParseBuffer};
78
79#[doc(inline)]
80pub use wast::core::GenerateDwarf;
81
82/// Parses a file on disk as a [WebAssembly Text format][wat] file, or a binary
83/// WebAssembly file
84///
85/// This function will read the bytes on disk and delegate them to the
86/// [`parse_bytes`] function. For more information on the behavior of parsing
87/// see [`parse_bytes`].
88///
89/// # Errors
90///
91/// For information about errors, see the [`parse_bytes`] documentation.
92///
93/// # Examples
94///
95/// ```
96/// # fn foo() -> wat::Result<()> {
97/// let binary = wat::parse_file("./foo.wat")?;
98/// // ...
99/// # Ok(())
100/// # }
101/// ```
102///
103/// [wat]: http://webassembly.github.io/spec/core/text/index.html
104pub fn parse_file(file: impl AsRef<Path>) -> Result<Vec<u8>> {
105    Parser::new().parse_file(file)
106}
107
108/// Parses in-memory bytes as either the [WebAssembly Text format][wat], or a
109/// binary WebAssembly module.
110///
111/// This function will attempt to interpret the given bytes as one of two
112/// options:
113///
114/// * A utf-8 string which is a `*.wat` file to be parsed.
115/// * A binary WebAssembly file starting with `b"\0asm"`
116///
117/// If the input is a string then it will be parsed as `*.wat`, and then after
118/// parsing it will be encoded back into a WebAssembly binary module. If the
119/// input is a binary that starts with `b"\0asm"` it will be returned verbatim.
120/// Everything that doesn't start with `b"\0asm"` will be parsed as a utf-8
121/// `*.wat` file, returning errors as appropriate.
122///
123/// For more information about parsing wat files, see [`parse_str`].
124///
125/// # Errors
126///
127/// In addition to all of the errors that can be returned from [`parse_str`],
128/// this function will also return an error if the input does not start with
129/// `b"\0asm"` and is invalid utf-8. (failed to even try to call [`parse_str`]).
130///
131/// # Examples
132///
133/// ```
134/// # fn foo() -> wat::Result<()> {
135/// // Parsing bytes that are actually `*.wat` files
136/// assert_eq!(&*wat::parse_bytes(b"(module)")?, b"\0asm\x01\0\0\0");
137/// assert!(wat::parse_bytes(b"module").is_err());
138/// assert!(wat::parse_bytes(b"binary\0file\0\that\0is\0not\0wat").is_err());
139///
140/// // Pass through binaries that look like real wasm files
141/// assert_eq!(&*wat::parse_bytes(b"\0asm\x01\0\0\0")?, b"\0asm\x01\0\0\0");
142/// # Ok(())
143/// # }
144/// ```
145///
146/// [wat]: http://webassembly.github.io/spec/core/text/index.html
147pub fn parse_bytes(bytes: &[u8]) -> Result<Cow<'_, [u8]>> {
148    Parser::new().parse_bytes(None, bytes)
149}
150
151/// Parses an in-memory string as the [WebAssembly Text format][wat], returning
152/// the file as a binary WebAssembly file.
153///
154/// This function is intended to be a stable convenience function for parsing a
155/// wat file into a WebAssembly binary file. This is a high-level operation
156/// which does not expose any parsing internals, for that you'll want to use the
157/// `wast` crate.
158///
159/// # Errors
160///
161/// This function can fail for a number of reasons, including (but not limited
162/// to):
163///
164/// * The `wat` input may fail to lex, such as having invalid tokens or syntax
165/// * The `wat` input may fail to parse, such as having incorrect syntactical
166///   structure
167/// * The `wat` input may contain names that could not be resolved
168///
169/// # Examples
170///
171/// ```
172/// # fn foo() -> wat::Result<()> {
173/// assert_eq!(wat::parse_str("(module)")?, b"\0asm\x01\0\0\0");
174/// assert!(wat::parse_str("module").is_err());
175///
176/// let wat = r#"
177///     (module
178///         (func $foo)
179///
180///         (func (export "bar")
181///             call $foo
182///         )
183///     )
184/// "#;
185///
186/// let binary = wat::parse_str(wat)?;
187/// // ...
188/// # Ok(())
189/// # }
190/// ```
191///
192/// [wat]: http://webassembly.github.io/spec/core/text/index.html
193pub fn parse_str(wat: impl AsRef<str>) -> Result<Vec<u8>> {
194    Parser::default().parse_str(None, wat)
195}
196
197/// Parser configuration for transforming bytes into WebAssembly binaries.
198#[derive(Default)]
199pub struct Parser {
200    #[cfg(feature = "dwarf")]
201    generate_dwarf: Option<GenerateDwarf>,
202    _private: (),
203}
204
205impl Parser {
206    /// Creates a new parser with th default settings.
207    pub fn new() -> Parser {
208        Parser::default()
209    }
210
211    /// Indicates that DWARF debugging information should be generated and
212    /// emitted by default.
213    ///
214    /// Note that DWARF debugging information is only emitted for textual-based
215    /// modules. For example if a WebAssembly binary is parsed via
216    /// [`Parser::parse_bytes`] this won't insert new DWARF information in such
217    /// a binary. Additionally if the text format used the `(module binary ...)`
218    /// form then no DWARF information will be emitted.
219    #[cfg(feature = "dwarf")]
220    pub fn generate_dwarf(&mut self, generate: GenerateDwarf) -> &mut Self {
221        self.generate_dwarf = Some(generate);
222        self
223    }
224
225    /// Equivalent of [`parse_file`] but uses this parser's settings.
226    pub fn parse_file(&self, path: impl AsRef<Path>) -> Result<Vec<u8>> {
227        self._parse_file(path.as_ref())
228    }
229
230    fn _parse_file(&self, file: &Path) -> Result<Vec<u8>> {
231        let contents = std::fs::read(file).map_err(|err| Error {
232            kind: Box::new(ErrorKind::Io {
233                err,
234                file: Some(file.to_owned()),
235            }),
236        })?;
237        match self.parse_bytes(Some(file), &contents) {
238            // If the result here is borrowed then that means that the input
239            // `&contents` was itself already a wasm module. We've already got
240            // an owned copy of that so return `contents` directly after
241            // double-checking it is indeed the same as the `bytes` return value
242            // here. That helps avoid a copy of `bytes` via something like
243            // `Cow::to_owned` which would otherwise copy the bytes.
244            Ok(Cow::Borrowed(bytes)) => {
245                assert_eq!(bytes.len(), contents.len());
246                assert_eq!(bytes.as_ptr(), contents.as_ptr());
247                Ok(contents)
248            }
249            Ok(Cow::Owned(bytes)) => Ok(bytes),
250            Err(mut e) => {
251                e.set_path(file);
252                Err(e)
253            }
254        }
255    }
256
257    /// Equivalent of [`parse_bytes`] but uses this parser's settings.
258    ///
259    /// The `path` argument is an optional path to use when error messages are
260    /// generated.
261    pub fn parse_bytes<'a>(&self, path: Option<&Path>, bytes: &'a [u8]) -> Result<Cow<'a, [u8]>> {
262        if bytes.starts_with(b"\0asm") {
263            return Ok(bytes.into());
264        }
265        match str::from_utf8(bytes) {
266            Ok(s) => self._parse_str(path, s).map(|s| s.into()),
267            Err(_) => Err(Error {
268                kind: Box::new(ErrorKind::Custom {
269                    msg: "input bytes aren't valid utf-8".to_string(),
270                    file: path.map(|p| p.to_owned()),
271                }),
272            }),
273        }
274    }
275
276    /// Equivalent of [`parse_str`] but uses this parser's settings.
277    ///
278    /// The `path` argument is an optional path to use when error messages are
279    /// generated.
280    pub fn parse_str(&self, path: Option<&Path>, wat: impl AsRef<str>) -> Result<Vec<u8>> {
281        self._parse_str(path, wat.as_ref())
282    }
283
284    fn _parse_str(&self, path: Option<&Path>, wat: &str) -> Result<Vec<u8>> {
285        let mut _buf = ParseBuffer::new(wat).map_err(|e| Error::cvt(e, wat, path))?;
286        #[cfg(feature = "dwarf")]
287        _buf.track_instr_spans(self.generate_dwarf.is_some());
288        let mut ast = parser::parse::<wast::Wat>(&_buf).map_err(|e| Error::cvt(e, wat, path))?;
289
290        let mut _opts = EncodeOptions::default();
291        #[cfg(feature = "dwarf")]
292        if let Some(style) = self.generate_dwarf {
293            _opts.dwarf(path.unwrap_or("<input>.wat".as_ref()), wat, style);
294        }
295        _opts
296            .encode_wat(&mut ast)
297            .map_err(|e| Error::cvt(e, wat, path))
298    }
299}
300
301/// Result of [`Detect::from_bytes`] to indicate what some input bytes look
302/// like.
303#[derive(Debug, PartialEq, Eq, Clone, Copy)]
304pub enum Detect {
305    /// The input bytes look like the WebAssembly text format.
306    WasmText,
307    /// The input bytes look like the WebAssembly binary format.
308    WasmBinary,
309    /// The input bytes don't look like WebAssembly at all.
310    Unknown,
311}
312
313impl Detect {
314    /// Detect quickly if supplied bytes represent a Wasm module,
315    /// whether binary encoded or in WAT-encoded.
316    ///
317    /// This briefly lexes past whitespace and comments as a `*.wat` file to see if
318    /// we can find a left-paren. If that fails then it's probably `*.wit` instead.
319    ///
320    ///
321    /// Examples
322    /// ```
323    /// use wat::Detect;
324    ///
325    /// assert_eq!(Detect::from_bytes(r#"
326    /// (module
327    ///   (type (;0;) (func))
328    ///   (func (;0;) (type 0)
329    ///     nop
330    ///   )
331    /// )
332    /// "#), Detect::WasmText);
333    /// ```
334    pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Detect {
335        if bytes.as_ref().starts_with(b"\0asm") {
336            return Detect::WasmBinary;
337        }
338        let text = match std::str::from_utf8(bytes.as_ref()) {
339            Ok(s) => s,
340            Err(_) => return Detect::Unknown,
341        };
342
343        let lexer = Lexer::new(text);
344        let mut iter = lexer.iter(0);
345
346        while let Some(next) = iter.next() {
347            match next.map(|t| t.kind) {
348                Ok(TokenKind::Whitespace)
349                | Ok(TokenKind::BlockComment)
350                | Ok(TokenKind::LineComment) => {}
351                Ok(TokenKind::LParen) => return Detect::WasmText,
352                _ => break,
353            }
354        }
355
356        Detect::Unknown
357    }
358
359    /// Returns whether this is either binary or textual wasm.
360    pub fn is_wasm(&self) -> bool {
361        match self {
362            Detect::WasmText | Detect::WasmBinary => true,
363            Detect::Unknown => false,
364        }
365    }
366}
367
368/// A convenience type definition for `Result` where the error is [`Error`]
369pub type Result<T> = std::result::Result<T, Error>;
370
371/// Errors from this crate related to parsing WAT files
372///
373/// An error can during example phases like:
374///
375/// * Lexing can fail if the document is syntactically invalid.
376/// * A string may not be utf-8
377/// * The syntactical structure of the wat file may be invalid
378/// * The wat file may be semantically invalid such as having name resolution
379///   failures
380#[derive(Debug)]
381pub struct Error {
382    kind: Box<ErrorKind>,
383}
384
385#[derive(Debug)]
386enum ErrorKind {
387    Wast(wast::Error),
388    Io {
389        err: std::io::Error,
390        file: Option<PathBuf>,
391    },
392    Custom {
393        msg: String,
394        file: Option<PathBuf>,
395    },
396}
397
398impl Error {
399    fn cvt<E: Into<wast::Error>>(e: E, contents: &str, path: Option<&Path>) -> Error {
400        let mut err = e.into();
401        if let Some(path) = path {
402            err.set_path(path);
403        }
404        err.set_text(contents);
405        Error {
406            kind: Box::new(ErrorKind::Wast(err)),
407        }
408    }
409
410    /// To provide a more useful error this function can be used to set
411    /// the file name that this error is associated with.
412    ///
413    /// The `file` here will be stored in this error and later rendered in the
414    /// `Display` implementation.
415    pub fn set_path<P: AsRef<Path>>(&mut self, file: P) {
416        let file = file.as_ref();
417        match &mut *self.kind {
418            ErrorKind::Wast(e) => e.set_path(file),
419            ErrorKind::Custom { file: f, .. } => *f = Some(file.to_owned()),
420            ErrorKind::Io { file: f, .. } => *f = Some(file.to_owned()),
421        }
422    }
423}
424
425impl fmt::Display for Error {
426    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
427        match &*self.kind {
428            ErrorKind::Wast(err) => err.fmt(f),
429            ErrorKind::Custom { msg, file, .. } => match file {
430                Some(file) => {
431                    write!(f, "failed to parse `{}`: {}", file.display(), msg)
432                }
433                None => msg.fmt(f),
434            },
435            ErrorKind::Io { err, file, .. } => match file {
436                Some(file) => {
437                    write!(f, "failed to read from `{}`", file.display())
438                }
439                None => err.fmt(f),
440            },
441        }
442    }
443}
444
445impl std::error::Error for Error {
446    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
447        match &*self.kind {
448            ErrorKind::Wast(_) => None,
449            ErrorKind::Custom { .. } => None,
450            ErrorKind::Io { err, .. } => Some(err),
451        }
452    }
453}
454
455#[cfg(test)]
456mod test {
457    use super::*;
458
459    #[test]
460    fn test_set_path() {
461        let mut e = parse_bytes(&[0xFF]).unwrap_err();
462        e.set_path("foo");
463        assert_eq!(
464            e.to_string(),
465            "failed to parse `foo`: input bytes aren't valid utf-8"
466        );
467
468        let e = parse_file("_does_not_exist_").unwrap_err();
469        assert!(
470            e.to_string()
471                .starts_with("failed to read from `_does_not_exist_`")
472        );
473
474        let mut e = parse_bytes("()".as_bytes()).unwrap_err();
475        e.set_path("foo");
476        assert_eq!(
477            e.to_string(),
478            "expected valid module field\n     --> foo:1:2\n      |\n    1 | ()\n      |  ^"
479        );
480    }
481}