wasm2map 0.1.0

Generates source map for .wasm file with DWARF debug info embedded as source map.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
#![warn(missing_docs)]
//! Having a sourcemap associated with your WASM file allows seeing the exact
//! filename, the line number and character position right in the browser or
//! supporting debugger. This can speed up tracing errors back to their source,
//! make sense of panic unwinds right in the browser and even simple console
//! messages are immediately identifiable without external post processing.
//!
//! It also offers an opportunity to debug the WASM binary, set breakpoints and
//! overall support the same developer experience JavaScript has in modern
//! browsers for ages.
//!
//! Inspirations:
//! * [wasm_sourcemap.py](https://github.com/emscripten-core/emscripten/blob/main/tools/wasm-sourcemap.py) by the Emscripten Team
//! * [WebAssembly Debugging](https://medium.com/oasislabs/webassembly-debugging-bec0aa93f8c6) by Will Scott and Oasis Labs

mod error;
#[cfg(test)]
mod test;

use error::Error;
use object::{Object, ObjectSection};
use std::{
    borrow::Cow,
    collections::BTreeMap,
    fs,
    io::{self, Seek, Write},
    ops::Deref,
    path::{Path, PathBuf},
};

const DWARF_CODE_SECTION_ID: usize = 10;

/// Represents a code unit which can be translated to a sourcemap code point
#[derive(Debug)]
pub struct CodePoint {
    path: PathBuf,
    address: i64,
    line: i64,
    column: i64,
}

/// The actual DWARF to Sourcemap mapper
///
/// # Usage
///
/// ```rust
/// use wasm2map::WASM;
///
/// let mapper = WASM::load("/path/to/the/file.wasm");
/// if let Ok(mut mapper) = mapper {
///     let sourcemap = mapper.map_v3();
///     mapper.patch("http://localhost:8080").expect("Failed to patch");
/// }
/// ```
#[derive(Debug)]
pub struct WASM {
    path: PathBuf,
    points: BTreeMap<i64, CodePoint>,
    sourcemap_size: Option<u64>,
}

impl WASM {
    /// Loads the WASM file under 'path' into memory and parses the DWARF info
    /// If the WASM or the DWARF info in it is malformed (or non-existent)
    /// it returns with the appropriate error result.
    pub fn load(path: impl AsRef<Path>) -> Result<Self, Error> {
        let path = path.as_ref().to_owned();

        #[cfg(feature = "memmap2")]
        let raw = {
            // Load the WASM file into memory via mmap to speed things up
            // with large WASM files
            let file = fs::File::open(&path)?;
            unsafe { memmap2::Mmap::map(&file) }?
        };
        #[cfg(not(feature = "memmap2"))]
        let raw = {
            // Load the WASM file via the standard library, which can be slower
            // for larger WASM files, but some platforms might not be supported
            // by memmap2
            fs::read(&path)?
        };

        // Parse the modules and sections from the WASM
        let object = object::File::parse(raw.deref())?;

        // Load the sourcemap custom section (if any) and calculate the total
        // size of the whole custom module (that is, the sourceMappingURL module)
        let sourcemap_size = match object.section_by_name("sourceMappingURL") {
            Some(section) => {
                // This is the '0' section type
                const CUSTOM_SEGMENT_ID_SIZE: u64 = 1;
                // The size of the length b"sourceMappingURL" (which is always
                // 1 byte, so the size of u8) + the length of the
                // b"sourceMappingURL" byte array
                const SEGMENT_NAME_SIZE: u64 =
                    std::mem::size_of::<u8>() as u64 + b"sourceMappingURL".len() as u64;
                let section_size_length = WASM::encode_uint_var(section.size() as u32).len() as u64;
                let section_size = CUSTOM_SEGMENT_ID_SIZE
                    + SEGMENT_NAME_SIZE
                    + section_size_length
                    + section.size();
                Some(section_size)
            }
            None => None,
        };

        // Load the code section to get its offset
        let offset: i64 = {
            let (code_section_offset, _) = object
                .section_by_index(object::SectionIndex(DWARF_CODE_SECTION_ID))?
                .file_range()
                .ok_or("Missing code section in WASM")?;
            code_section_offset.try_into()?
        };

        // Load all of the DWARF sections
        let section =
            gimli::Dwarf::load(|id: gimli::SectionId| -> Result<Cow<[u8]>, gimli::Error> {
                match object.section_by_name(id.name()) {
                    Some(ref section) => Ok(section
                        .uncompressed_data()
                        .unwrap_or(Cow::Borrowed(&[][..]))),
                    None => Ok(Cow::Borrowed(&[][..])),
                }
            })?;

        // Borrow a `Cow<[u8]>` to create an `EndianSlice`.
        let borrow_section: &dyn for<'a> Fn(
            &'a Cow<[u8]>,
        )
            -> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
            &|section| gimli::EndianSlice::new(section, gimli::RunTimeEndian::Little);

        // Create `EndianSlice`s for all of the sections.
        let dwarf = section.borrow(&borrow_section);

        // Collect the debug data and enforce that they are sorted by address
        // which BTreeMap guarantees
        let mut points: BTreeMap<i64, CodePoint> = BTreeMap::new();

        let mut iter = dwarf.units();
        while let Some(header) = iter.next()? {
            let unit = dwarf.unit(header)?;

            // Get the line program for the compilation unit.
            if let Some(program) = unit.line_program.clone() {
                // Iterate over the line program rows for the unit.
                let mut rows = program.rows();
                while let Some((header, row)) = rows.next_row()? {
                    // We will collect the embdedded path from the DWARF loc metadata
                    let mut path = PathBuf::new();

                    if let Some(file) = row.file(header) {
                        // The directory index 0 is defined to correspond to the compilation unit directory.
                        if file.directory_index() != 0 {
                            if let Some(dir) = file.directory(header) {
                                path.push(
                                    dwarf.attr_string(&unit, dir)?.to_string_lossy().as_ref(),
                                );
                            }
                        }

                        path.push(
                            dwarf
                                .attr_string(&unit, file.path_name())?
                                .to_string_lossy()
                                .as_ref(),
                        );
                    }

                    // The address of the instruction in the code section
                    let address: i64 = {
                        let mut addr: i64 = row.address().try_into()?;
                        if row.end_sequence() {
                            addr -= 1;
                        }
                        addr + offset
                    };

                    // Determine line/column. DWARF line/column is never 0
                    let line = {
                        let line = match row.line() {
                            Some(line) => line.get(),

                            // No line information means this code block does not belong to
                            // a source code block (generated by the compiler for whatever
                            // reason)
                            None => 0,
                        };
                        line.try_into()?
                    };

                    let column: i64 = {
                        let col = match row.column() {
                            gimli::ColumnType::LeftEdge => 1,
                            gimli::ColumnType::Column(column) => column.get(),
                        };
                        col.try_into()?
                    };

                    let point = CodePoint {
                        path,
                        address,
                        line,
                        column,
                    };

                    points.insert(point.address, point);
                }
            }
        }

        Ok(Self {
            path,
            points,
            sourcemap_size,
        })
    }

    /// Generate the sourcemap v3 JSON from the parsed WASM DWARF data
    ///
    /// # Example output
    ///
    /// ```json
    /// {
    ///     "version": 3,
    ///     "names": [],
    ///     "sources": [
    ///         "file/path/name.rs",
    ///         "another/file/path.rs"
    ///         ...
    ///     ],
    ///     "sourcesContent": null,
    ///     "mappings": {
    ///         "yjBAiIA,qCAIiB,QAMhB,...,oBAAA"
    ///     }
    /// }
    /// ```
    pub fn map_v3(&self) -> String {
        let mut sourcemap = String::with_capacity(self.points.len() * 4 + 100);
        let (mappings, sources) = self.generate();

        sourcemap.push('{');
        sourcemap.push_str(r#""version":3,"#);
        sourcemap.push_str(r#""names":[],"#);
        sourcemap.push_str(format!(r#""sources":["{}"],"#, sources.join(r#"",""#)).as_str());
        sourcemap.push_str(r#""sourcesContent":null,"#);
        sourcemap.push_str(format!(r#""mappings":"{}""#, mappings.join(",")).as_str());
        sourcemap.push('}');

        sourcemap
    }

    #[allow(rustdoc::invalid_html_tags)]
    /// Patch the loaded WASM file to reference the sourcemap and ask the
    /// browser or debugger to load it for us when referencing the code
    ///
    /// # Limitations
    /// This can only work if the sourceMappingURL custom section is the last
    /// section of the WASM.
    ///
    /// # How does this work?
    ///
    /// The WebAssembly specification contains a "custom" section definition
    /// which is used to encode the sourcemap url in the WASM binary.
    ///
    /// The structure of the custom module is as follows (without ):
    /// (
    ///     0 <section_length> (
    ///         <name_length> <name>
    ///         <urllen> <url>
    ///     )
    /// )
    ///
    /// This structure is VLQ encoded without the parentheses and spaces into
    /// a byte array and appended to the end of the WASM binary.
    ///
    /// More details in the [WebAssembly Module Specification](https://webassembly.github.io/spec/core/binary/modules.html)
    pub fn patch(&mut self, url: &str) -> Result<(), Error> {
        // Open WASM binary for writing
        let mut wasm = fs::OpenOptions::new()
            .write(true)
            .open(&self.path)
            .map_err(|err| {
                format!(
                    "Failed to open WASM file to append sourcemap section: {}",
                    err
                )
            })?;

        // Grab the actual size (byte count) of the WASM binary
        let size = wasm.seek(io::SeekFrom::End(0))?;

        // Determine the file cusrsor position without the custom section (if any)
        // by subtracting the size of the sourceMappingURL section from the
        // byte size of the WASM binary
        let pos = self
            .sourcemap_size
            .map(|length| size - length)
            .unwrap_or(size);

        // Truncate the WASM binary and position the file cursor to the new end
        // (if there was a sourcemap added), no-op otherwise
        wasm.set_len(pos)?;
        wasm.seek(io::SeekFrom::End(0))?;

        // Generate the souceMappingURL custom
        // section (see above for info on structure)
        const WASM_CUSTOM_SECTION_ID: u32 = 0;
        let section_name = "sourceMappingURL";
        let section_content = [
            &WASM::encode_uint_var(section_name.len() as u32)[..],
            section_name.as_bytes(),
            &WASM::encode_uint_var(url.len() as u32)[..],
            url.as_bytes(),
        ]
        .concat();
        let section = [
            &WASM::encode_uint_var(WASM_CUSTOM_SECTION_ID)[..],
            &WASM::encode_uint_var(section_content.len() as u32)[..],
            section_content.as_ref(),
        ]
        .concat();

        // Write out the custom section
        wasm.write_all(&section)
            .map_err(|err| format!("Failed to write sourcemap section to WASM file: {}", err))?;

        let _s = wasm.seek(io::SeekFrom::End(0));

        // Set the sourcemap data after writing it out
        self.sourcemap_size = Some(section.len() as u64);

        Ok(())
    }

    // Generate the sourcemap mappings and source ids.
    //
    // The sourcemap 3 format tries to save on file size by using offsets
    // wherever possible. So we need to encode the source file data and
    // line, column data for each WASM code segment address in the expected
    // order, so offsets make sense when resolved by the browser (or debugger)
    fn generate<'a>(&'a self) -> (Vec<String>, Vec<String>) {
        // We collect all referenced source code files in a table and use the
        // source id (which is the value param of this HashMap) as the basis for
        // the offset when encoding position (i.e. last source id - this source id),
        // which require preserving the order of inserts!
        let mut sources: Vec<&'a Path> = Vec::new();
        //let mut sources: BTreeMap<&'a Path, i64> = BTreeMap::new();
        //let mut sources: HashMap<&'a Path, i64> = HashMap::new();

        // This is the WASM address -> file:line:col mapping table in the
        // required format, which is basically offsets written after each other
        // in the specified order (address, source id, line, finally col)
        let mut mappings: Vec<String> = Vec::new();

        // These variables track the last of the four pieces of data so we can
        // subtract from them to get an offset and then update them to the latest
        let mut last_address: i64 = 0;
        let mut last_source_id: i64 = 0;
        let mut last_line: i64 = 1;
        let mut last_column: i64 = 1;

        for line in self.points.values() {
            // Line 0 means that this is an intermediate code block and does not
            // refer to a code block in the source files. We need to skip these
            // in order to generate the proper offset encoding
            if line.line == 0 {
                continue;
            }

            // We either get the id of a source file if already in the table
            // or we get the max(id) + 1 as the new id for a previously unseen
            // source file, which we promptly insert into the source table

            let source_id: i64 =
                if let Some(id) = sources.iter().position(|&val| val == line.path.as_path()) {
                    id as i64
                } else {
                    let id = sources.len() as i64;
                    sources.push(&line.path);
                    id
                };

            // Calculate the offsets (see above)
            let address_delta = line.address - last_address;
            let source_id_delta = source_id - last_source_id;
            let line_delta = line.line - last_line;
            let column_delta = line.column - last_column;

            // Store the mapping offsets in the specific format
            // (see above) in the mapping table
            let mapping = format!(
                "{}{}{}{}",
                WASM::vlq_encode(address_delta).as_str(),
                WASM::vlq_encode(source_id_delta).as_str(),
                WASM::vlq_encode(line_delta).as_str(),
                WASM::vlq_encode(column_delta).as_str()
            );
            mappings.push(mapping);

            // Update the tracking variables to the freshly calculated values
            // to use them in the next iteration (see above)
            last_address = line.address;
            last_source_id = source_id;
            last_line = line.line;
            last_column = line.column;
        }

        // We only need the file paths from the sources table in the order
        // they were encoded, turned to strings
        let source_paths = sources
            .iter()
            .filter_map(|p| Some(p.as_os_str().to_str()?.to_owned()))
            .collect::<Vec<_>>();

        (mappings, source_paths)
    }

    // Simple implementation of VLQ (variable-length quality) encoding to avoid
    // yet another dependency to accomplish this simple task
    //
    // TODO(mtolmacs): Use smallvec instead of string
    fn vlq_encode(value: i64) -> String {
        const VLQ_CHARS: &[u8] =
            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".as_bytes();
        let mut x = if value >= 0 {
            value << 1
        } else {
            (-value << 1) + 1
        };
        let mut result = String::new();

        while x > 31 {
            let idx: usize = (32 + (x & 31)).try_into().unwrap();
            let ch: char = VLQ_CHARS[idx].into();
            result.push(ch);
            x >>= 5;
        }
        let idx: usize = x.try_into().unwrap();
        let ch: char = VLQ_CHARS[idx].into();
        result.push(ch);

        result
    }

    fn encode_uint_var(mut n: u32) -> Vec<u8> {
        let mut result = Vec::new();
        while n > 127 {
            result.push((128 | (n & 127)) as u8);
            n >>= 7;
        }
        result.push(n as u8);
        result
    }
}