wasm2map/
lib.rs

1#![warn(missing_docs)]
2//! Having a sourcemap associated with your WASM file allows seeing the exact
3//! filename, the line number and character position right in the browser or
4//! supporting debugger. This can speed up tracing errors back to their source,
5//! make sense of panic unwinds right in the browser and even simple console
6//! messages are immediately identifiable without external post processing.
7//!
8//! It also offers an opportunity to debug the WASM binary, set breakpoints and
9//! overall support the same developer experience JavaScript has in modern
10//! browsers for ages.
11//!
12//! Inspirations:
13//! * [wasm_sourcemap.py](https://github.com/emscripten-core/emscripten/blob/main/tools/wasm-sourcemap.py) by the Emscripten Team
14//! * [WebAssembly Debugging](https://medium.com/oasislabs/webassembly-debugging-bec0aa93f8c6) by Will Scott and Oasis Labs
15
16mod error;
17#[cfg(test)]
18mod test;
19
20use error::Error;
21use object::{Object, ObjectSection};
22use std::{
23    borrow::Cow,
24    collections::BTreeMap,
25    fs,
26    io::{self, Seek, Write},
27    ops::Deref,
28    path::{Path, PathBuf},
29};
30
31const DWARF_CODE_SECTION_ID: usize = 10;
32
33/// Represents a code unit which can be translated to a sourcemap code point
34#[derive(Debug)]
35pub struct CodePoint {
36    path: PathBuf,
37    address: i64,
38    line: i64,
39    column: i64,
40}
41
42/// The actual DWARF to Sourcemap mapper
43///
44/// # Usage
45///
46/// ```rust
47/// use wasm2map::WASM;
48///
49/// let mapper = WASM::load("/path/to/the/file.wasm");
50/// if let Ok(mut mapper) = mapper {
51///     let sourcemap = mapper.map_v3();
52///     mapper.patch("http://localhost:8080").expect("Failed to patch");
53/// }
54/// ```
55#[derive(Debug)]
56pub struct WASM {
57    path: PathBuf,
58    points: BTreeMap<i64, CodePoint>,
59    sourcemap_size: Option<u64>,
60}
61
62impl WASM {
63    /// Loads the WASM file under 'path' into memory and parses the DWARF info
64    /// If the WASM or the DWARF info in it is malformed (or non-existent)
65    /// it returns with the appropriate error result.
66    pub fn load(path: impl AsRef<Path>) -> Result<Self, Error> {
67        let path = path.as_ref().to_owned();
68
69        #[cfg(feature = "memmap2")]
70        let raw = {
71            // Load the WASM file into memory via mmap to speed things up
72            // with large WASM files
73            let file = fs::File::open(&path)?;
74            unsafe { memmap2::Mmap::map(&file) }?
75        };
76        #[cfg(not(feature = "memmap2"))]
77        let raw = {
78            // Load the WASM file via the standard library, which can be slower
79            // for larger WASM files, but some platforms might not be supported
80            // by memmap2
81            fs::read(&path)?
82        };
83
84        // Parse the modules and sections from the WASM
85        let object = object::File::parse(raw.deref())?;
86
87        // Load the sourcemap custom section (if any) and calculate the total
88        // size of the whole custom module (that is, the sourceMappingURL module)
89        let sourcemap_size = match object.section_by_name("sourceMappingURL") {
90            Some(section) => {
91                // This is the '0' section type
92                const CUSTOM_SEGMENT_ID_SIZE: u64 = 1;
93                // The size of the length b"sourceMappingURL" (which is always
94                // 1 byte, so the size of u8) + the length of the
95                // b"sourceMappingURL" byte array
96                const SEGMENT_NAME_SIZE: u64 =
97                    std::mem::size_of::<u8>() as u64 + b"sourceMappingURL".len() as u64;
98                let section_size_length = WASM::encode_uint_var(section.size() as u32).len() as u64;
99                let section_size = CUSTOM_SEGMENT_ID_SIZE
100                    + SEGMENT_NAME_SIZE
101                    + section_size_length
102                    + section.size();
103                Some(section_size)
104            }
105            None => None,
106        };
107
108        // Load the code section to get its offset
109        let offset: i64 = {
110            let (code_section_offset, _) = object
111                .section_by_index(object::SectionIndex(DWARF_CODE_SECTION_ID))?
112                .file_range()
113                .ok_or("Missing code section in WASM")?;
114            code_section_offset.try_into()?
115        };
116
117        // Load all of the DWARF sections
118        let section =
119            gimli::Dwarf::load(|id: gimli::SectionId| -> Result<Cow<[u8]>, gimli::Error> {
120                match object.section_by_name(id.name()) {
121                    Some(ref section) => Ok(section
122                        .uncompressed_data()
123                        .unwrap_or(Cow::Borrowed(&[][..]))),
124                    None => Ok(Cow::Borrowed(&[][..])),
125                }
126            })?;
127
128        // Borrow a `Cow<[u8]>` to create an `EndianSlice`.
129        let borrow_section: &dyn for<'a> Fn(
130            &'a Cow<[u8]>,
131        )
132            -> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
133            &|section| gimli::EndianSlice::new(section, gimli::RunTimeEndian::Little);
134
135        // Create `EndianSlice`s for all of the sections.
136        let dwarf = section.borrow(&borrow_section);
137
138        // Collect the debug data and enforce that they are sorted by address
139        // which BTreeMap guarantees
140        let mut points: BTreeMap<i64, CodePoint> = BTreeMap::new();
141
142        let mut iter = dwarf.units();
143        while let Some(header) = iter.next()? {
144            let unit = dwarf.unit(header)?;
145
146            // Get the line program for the compilation unit.
147            if let Some(program) = unit.line_program.clone() {
148                // Iterate over the line program rows for the unit.
149                let mut rows = program.rows();
150                while let Some((header, row)) = rows.next_row()? {
151                    // We will collect the embdedded path from the DWARF loc metadata
152                    let mut path = PathBuf::new();
153
154                    if let Some(file) = row.file(header) {
155                        // The directory index 0 is defined to correspond to the compilation unit directory.
156                        if file.directory_index() != 0 {
157                            if let Some(dir) = file.directory(header) {
158                                path.push(
159                                    dwarf.attr_string(&unit, dir)?.to_string_lossy().as_ref(),
160                                );
161                            }
162                        }
163
164                        path.push(
165                            dwarf
166                                .attr_string(&unit, file.path_name())?
167                                .to_string_lossy()
168                                .as_ref(),
169                        );
170                    }
171
172                    // The address of the instruction in the code section
173                    let address: i64 = {
174                        let mut addr: i64 = row.address().try_into()?;
175                        if row.end_sequence() {
176                            addr -= 1;
177                        }
178                        addr + offset
179                    };
180
181                    // Determine line/column. DWARF line/column is never 0
182                    let line = {
183                        let line = match row.line() {
184                            Some(line) => line.get(),
185
186                            // No line information means this code block does not belong to
187                            // a source code block (generated by the compiler for whatever
188                            // reason)
189                            None => 0,
190                        };
191                        line.try_into()?
192                    };
193
194                    let column: i64 = {
195                        let col = match row.column() {
196                            gimli::ColumnType::LeftEdge => 1,
197                            gimli::ColumnType::Column(column) => column.get(),
198                        };
199                        col.try_into()?
200                    };
201
202                    let point = CodePoint {
203                        path,
204                        address,
205                        line,
206                        column,
207                    };
208
209                    points.insert(point.address, point);
210                }
211            }
212        }
213
214        Ok(Self {
215            path,
216            points,
217            sourcemap_size,
218        })
219    }
220
221    /// Generate the sourcemap v3 JSON from the parsed WASM DWARF data
222    ///
223    /// # Example output
224    ///
225    /// ```json
226    /// {
227    ///     "version": 3,
228    ///     "names": [],
229    ///     "sources": [
230    ///         "file/path/name.rs",
231    ///         "another/file/path.rs"
232    ///         ...
233    ///     ],
234    ///     "sourcesContent": null,
235    ///     "mappings": {
236    ///         "yjBAiIA,qCAIiB,QAMhB,...,oBAAA"
237    ///     }
238    /// }
239    /// ```
240    pub fn map_v3(&self) -> String {
241        let mut sourcemap = String::with_capacity(self.points.len() * 4 + 100);
242        let (mappings, sources) = self.generate();
243
244        sourcemap.push('{');
245        sourcemap.push_str(r#""version":3,"#);
246        sourcemap.push_str(r#""names":[],"#);
247        sourcemap.push_str(format!(r#""sources":["{}"],"#, sources.join(r#"",""#)).as_str());
248        sourcemap.push_str(r#""sourcesContent":null,"#);
249        sourcemap.push_str(format!(r#""mappings":"{}""#, mappings.join(",")).as_str());
250        sourcemap.push('}');
251
252        sourcemap
253    }
254
255    #[allow(rustdoc::invalid_html_tags)]
256    /// Patch the loaded WASM file to reference the sourcemap and ask the
257    /// browser or debugger to load it for us when referencing the code
258    ///
259    /// # Limitations
260    /// This can only work if the sourceMappingURL custom section is the last
261    /// section of the WASM.
262    ///
263    /// # How does this work?
264    ///
265    /// The WebAssembly specification contains a "custom" section definition
266    /// which is used to encode the sourcemap url in the WASM binary.
267    ///
268    /// The structure of the custom module is as follows (without ):
269    /// (
270    ///     0 <section_length> (
271    ///         <name_length> <name>
272    ///         <urllen> <url>
273    ///     )
274    /// )
275    ///
276    /// This structure is VLQ encoded without the parentheses and spaces into
277    /// a byte array and appended to the end of the WASM binary.
278    ///
279    /// More details in the [WebAssembly Module Specification](https://webassembly.github.io/spec/core/binary/modules.html)
280    pub fn patch(&mut self, url: &str) -> Result<(), Error> {
281        // Open WASM binary for writing
282        let mut wasm = fs::OpenOptions::new()
283            .write(true)
284            .open(&self.path)
285            .map_err(|err| {
286                format!(
287                    "Failed to open WASM file to append sourcemap section: {}",
288                    err
289                )
290            })?;
291
292        // Grab the actual size (byte count) of the WASM binary
293        let size = wasm.seek(io::SeekFrom::End(0))?;
294
295        // Determine the file cusrsor position without the custom section (if any)
296        // by subtracting the size of the sourceMappingURL section from the
297        // byte size of the WASM binary
298        let pos = self
299            .sourcemap_size
300            .map(|length| size - length)
301            .unwrap_or(size);
302
303        // Truncate the WASM binary and position the file cursor to the new end
304        // (if there was a sourcemap added), no-op otherwise
305        wasm.set_len(pos)?;
306        wasm.seek(io::SeekFrom::End(0))?;
307
308        // Generate the souceMappingURL custom
309        // section (see above for info on structure)
310        const WASM_CUSTOM_SECTION_ID: u32 = 0;
311        let section_name = "sourceMappingURL";
312        let section_content = [
313            &WASM::encode_uint_var(section_name.len() as u32)[..],
314            section_name.as_bytes(),
315            &WASM::encode_uint_var(url.len() as u32)[..],
316            url.as_bytes(),
317        ]
318        .concat();
319        let section = [
320            &WASM::encode_uint_var(WASM_CUSTOM_SECTION_ID)[..],
321            &WASM::encode_uint_var(section_content.len() as u32)[..],
322            section_content.as_ref(),
323        ]
324        .concat();
325
326        // Write out the custom section
327        wasm.write_all(&section)
328            .map_err(|err| format!("Failed to write sourcemap section to WASM file: {}", err))?;
329
330        let _s = wasm.seek(io::SeekFrom::End(0));
331
332        // Set the sourcemap data after writing it out
333        self.sourcemap_size = Some(section.len() as u64);
334
335        Ok(())
336    }
337
338    // Generate the sourcemap mappings and source ids.
339    //
340    // The sourcemap 3 format tries to save on file size by using offsets
341    // wherever possible. So we need to encode the source file data and
342    // line, column data for each WASM code segment address in the expected
343    // order, so offsets make sense when resolved by the browser (or debugger)
344    fn generate<'a>(&'a self) -> (Vec<String>, Vec<String>) {
345        // We collect all referenced source code files in a table and use the
346        // source id (which is the value param of this HashMap) as the basis for
347        // the offset when encoding position (i.e. last source id - this source id),
348        // which require preserving the order of inserts!
349        let mut sources: Vec<&'a Path> = Vec::new();
350        //let mut sources: BTreeMap<&'a Path, i64> = BTreeMap::new();
351        //let mut sources: HashMap<&'a Path, i64> = HashMap::new();
352
353        // This is the WASM address -> file:line:col mapping table in the
354        // required format, which is basically offsets written after each other
355        // in the specified order (address, source id, line, finally col)
356        let mut mappings: Vec<String> = Vec::new();
357
358        // These variables track the last of the four pieces of data so we can
359        // subtract from them to get an offset and then update them to the latest
360        let mut last_address: i64 = 0;
361        let mut last_source_id: i64 = 0;
362        let mut last_line: i64 = 1;
363        let mut last_column: i64 = 1;
364
365        for line in self.points.values() {
366            // Line 0 means that this is an intermediate code block and does not
367            // refer to a code block in the source files. We need to skip these
368            // in order to generate the proper offset encoding
369            if line.line == 0 {
370                continue;
371            }
372
373            // We either get the id of a source file if already in the table
374            // or we get the max(id) + 1 as the new id for a previously unseen
375            // source file, which we promptly insert into the source table
376
377            let source_id: i64 =
378                if let Some(id) = sources.iter().position(|&val| val == line.path.as_path()) {
379                    id as i64
380                } else {
381                    let id = sources.len() as i64;
382                    sources.push(&line.path);
383                    id
384                };
385
386            // Calculate the offsets (see above)
387            let address_delta = line.address - last_address;
388            let source_id_delta = source_id - last_source_id;
389            let line_delta = line.line - last_line;
390            let column_delta = line.column - last_column;
391
392            // Store the mapping offsets in the specific format
393            // (see above) in the mapping table
394            let mapping = format!(
395                "{}{}{}{}",
396                WASM::vlq_encode(address_delta).as_str(),
397                WASM::vlq_encode(source_id_delta).as_str(),
398                WASM::vlq_encode(line_delta).as_str(),
399                WASM::vlq_encode(column_delta).as_str()
400            );
401            mappings.push(mapping);
402
403            // Update the tracking variables to the freshly calculated values
404            // to use them in the next iteration (see above)
405            last_address = line.address;
406            last_source_id = source_id;
407            last_line = line.line;
408            last_column = line.column;
409        }
410
411        // We only need the file paths from the sources table in the order
412        // they were encoded, turned to strings
413        let source_paths = sources
414            .iter()
415            .filter_map(|p| Some(p.as_os_str().to_str()?.to_owned()))
416            .collect::<Vec<_>>();
417
418        (mappings, source_paths)
419    }
420
421    // Simple implementation of VLQ (variable-length quality) encoding to avoid
422    // yet another dependency to accomplish this simple task
423    //
424    // TODO(mtolmacs): Use smallvec instead of string
425    fn vlq_encode(value: i64) -> String {
426        const VLQ_CHARS: &[u8] =
427            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".as_bytes();
428        let mut x = if value >= 0 {
429            value << 1
430        } else {
431            (-value << 1) + 1
432        };
433        let mut result = String::new();
434
435        while x > 31 {
436            let idx: usize = (32 + (x & 31)).try_into().unwrap();
437            let ch: char = VLQ_CHARS[idx].into();
438            result.push(ch);
439            x >>= 5;
440        }
441        let idx: usize = x.try_into().unwrap();
442        let ch: char = VLQ_CHARS[idx].into();
443        result.push(ch);
444
445        result
446    }
447
448    fn encode_uint_var(mut n: u32) -> Vec<u8> {
449        let mut result = Vec::new();
450        while n > 127 {
451            result.push((128 | (n & 127)) as u8);
452            n >>= 7;
453        }
454        result.push(n as u8);
455        result
456    }
457}