wasm2map/lib.rs
1#![warn(missing_docs)]
2//! Having a sourcemap associated with your WASM file allows seeing the exact
3//! filename, the line number and character position right in the browser or
4//! supporting debugger. This can speed up tracing errors back to their source,
5//! make sense of panic unwinds right in the browser and even simple console
6//! messages are immediately identifiable without external post processing.
7//!
8//! It also offers an opportunity to debug the WASM binary, set breakpoints and
9//! overall support the same developer experience JavaScript has in modern
10//! browsers for ages.
11//!
12//! Inspirations:
13//! * [wasm_sourcemap.py](https://github.com/emscripten-core/emscripten/blob/main/tools/wasm-sourcemap.py) by the Emscripten Team
14//! * [WebAssembly Debugging](https://medium.com/oasislabs/webassembly-debugging-bec0aa93f8c6) by Will Scott and Oasis Labs
15
16mod error;
17#[cfg(test)]
18mod test;
19
20use error::Error;
21use object::{Object, ObjectSection};
22use std::{
23 borrow::Cow,
24 collections::BTreeMap,
25 fs,
26 io::{self, Seek, Write},
27 ops::Deref,
28 path::{Path, PathBuf},
29};
30
31const DWARF_CODE_SECTION_ID: usize = 10;
32
33/// Represents a code unit which can be translated to a sourcemap code point
34#[derive(Debug)]
35pub struct CodePoint {
36 path: PathBuf,
37 address: i64,
38 line: i64,
39 column: i64,
40}
41
42/// The actual DWARF to Sourcemap mapper
43///
44/// # Usage
45///
46/// ```rust
47/// use wasm2map::WASM;
48///
49/// let mapper = WASM::load("/path/to/the/file.wasm");
50/// if let Ok(mut mapper) = mapper {
51/// let sourcemap = mapper.map_v3();
52/// mapper.patch("http://localhost:8080").expect("Failed to patch");
53/// }
54/// ```
55#[derive(Debug)]
56pub struct WASM {
57 path: PathBuf,
58 points: BTreeMap<i64, CodePoint>,
59 sourcemap_size: Option<u64>,
60}
61
62impl WASM {
63 /// Loads the WASM file under 'path' into memory and parses the DWARF info
64 /// If the WASM or the DWARF info in it is malformed (or non-existent)
65 /// it returns with the appropriate error result.
66 pub fn load(path: impl AsRef<Path>) -> Result<Self, Error> {
67 let path = path.as_ref().to_owned();
68
69 #[cfg(feature = "memmap2")]
70 let raw = {
71 // Load the WASM file into memory via mmap to speed things up
72 // with large WASM files
73 let file = fs::File::open(&path)?;
74 unsafe { memmap2::Mmap::map(&file) }?
75 };
76 #[cfg(not(feature = "memmap2"))]
77 let raw = {
78 // Load the WASM file via the standard library, which can be slower
79 // for larger WASM files, but some platforms might not be supported
80 // by memmap2
81 fs::read(&path)?
82 };
83
84 // Parse the modules and sections from the WASM
85 let object = object::File::parse(raw.deref())?;
86
87 // Load the sourcemap custom section (if any) and calculate the total
88 // size of the whole custom module (that is, the sourceMappingURL module)
89 let sourcemap_size = match object.section_by_name("sourceMappingURL") {
90 Some(section) => {
91 // This is the '0' section type
92 const CUSTOM_SEGMENT_ID_SIZE: u64 = 1;
93 // The size of the length b"sourceMappingURL" (which is always
94 // 1 byte, so the size of u8) + the length of the
95 // b"sourceMappingURL" byte array
96 const SEGMENT_NAME_SIZE: u64 =
97 std::mem::size_of::<u8>() as u64 + b"sourceMappingURL".len() as u64;
98 let section_size_length = WASM::encode_uint_var(section.size() as u32).len() as u64;
99 let section_size = CUSTOM_SEGMENT_ID_SIZE
100 + SEGMENT_NAME_SIZE
101 + section_size_length
102 + section.size();
103 Some(section_size)
104 }
105 None => None,
106 };
107
108 // Load the code section to get its offset
109 let offset: i64 = {
110 let (code_section_offset, _) = object
111 .section_by_index(object::SectionIndex(DWARF_CODE_SECTION_ID))?
112 .file_range()
113 .ok_or("Missing code section in WASM")?;
114 code_section_offset.try_into()?
115 };
116
117 // Load all of the DWARF sections
118 let section =
119 gimli::Dwarf::load(|id: gimli::SectionId| -> Result<Cow<[u8]>, gimli::Error> {
120 match object.section_by_name(id.name()) {
121 Some(ref section) => Ok(section
122 .uncompressed_data()
123 .unwrap_or(Cow::Borrowed(&[][..]))),
124 None => Ok(Cow::Borrowed(&[][..])),
125 }
126 })?;
127
128 // Borrow a `Cow<[u8]>` to create an `EndianSlice`.
129 let borrow_section: &dyn for<'a> Fn(
130 &'a Cow<[u8]>,
131 )
132 -> gimli::EndianSlice<'a, gimli::RunTimeEndian> =
133 &|section| gimli::EndianSlice::new(section, gimli::RunTimeEndian::Little);
134
135 // Create `EndianSlice`s for all of the sections.
136 let dwarf = section.borrow(&borrow_section);
137
138 // Collect the debug data and enforce that they are sorted by address
139 // which BTreeMap guarantees
140 let mut points: BTreeMap<i64, CodePoint> = BTreeMap::new();
141
142 let mut iter = dwarf.units();
143 while let Some(header) = iter.next()? {
144 let unit = dwarf.unit(header)?;
145
146 // Get the line program for the compilation unit.
147 if let Some(program) = unit.line_program.clone() {
148 // Iterate over the line program rows for the unit.
149 let mut rows = program.rows();
150 while let Some((header, row)) = rows.next_row()? {
151 // We will collect the embdedded path from the DWARF loc metadata
152 let mut path = PathBuf::new();
153
154 if let Some(file) = row.file(header) {
155 // The directory index 0 is defined to correspond to the compilation unit directory.
156 if file.directory_index() != 0 {
157 if let Some(dir) = file.directory(header) {
158 path.push(
159 dwarf.attr_string(&unit, dir)?.to_string_lossy().as_ref(),
160 );
161 }
162 }
163
164 path.push(
165 dwarf
166 .attr_string(&unit, file.path_name())?
167 .to_string_lossy()
168 .as_ref(),
169 );
170 }
171
172 // The address of the instruction in the code section
173 let address: i64 = {
174 let mut addr: i64 = row.address().try_into()?;
175 if row.end_sequence() {
176 addr -= 1;
177 }
178 addr + offset
179 };
180
181 // Determine line/column. DWARF line/column is never 0
182 let line = {
183 let line = match row.line() {
184 Some(line) => line.get(),
185
186 // No line information means this code block does not belong to
187 // a source code block (generated by the compiler for whatever
188 // reason)
189 None => 0,
190 };
191 line.try_into()?
192 };
193
194 let column: i64 = {
195 let col = match row.column() {
196 gimli::ColumnType::LeftEdge => 1,
197 gimli::ColumnType::Column(column) => column.get(),
198 };
199 col.try_into()?
200 };
201
202 let point = CodePoint {
203 path,
204 address,
205 line,
206 column,
207 };
208
209 points.insert(point.address, point);
210 }
211 }
212 }
213
214 Ok(Self {
215 path,
216 points,
217 sourcemap_size,
218 })
219 }
220
221 /// Generate the sourcemap v3 JSON from the parsed WASM DWARF data
222 ///
223 /// # Example output
224 ///
225 /// ```json
226 /// {
227 /// "version": 3,
228 /// "names": [],
229 /// "sources": [
230 /// "file/path/name.rs",
231 /// "another/file/path.rs"
232 /// ...
233 /// ],
234 /// "sourcesContent": null,
235 /// "mappings": {
236 /// "yjBAiIA,qCAIiB,QAMhB,...,oBAAA"
237 /// }
238 /// }
239 /// ```
240 pub fn map_v3(&self) -> String {
241 let mut sourcemap = String::with_capacity(self.points.len() * 4 + 100);
242 let (mappings, sources) = self.generate();
243
244 sourcemap.push('{');
245 sourcemap.push_str(r#""version":3,"#);
246 sourcemap.push_str(r#""names":[],"#);
247 sourcemap.push_str(format!(r#""sources":["{}"],"#, sources.join(r#"",""#)).as_str());
248 sourcemap.push_str(r#""sourcesContent":null,"#);
249 sourcemap.push_str(format!(r#""mappings":"{}""#, mappings.join(",")).as_str());
250 sourcemap.push('}');
251
252 sourcemap
253 }
254
255 #[allow(rustdoc::invalid_html_tags)]
256 /// Patch the loaded WASM file to reference the sourcemap and ask the
257 /// browser or debugger to load it for us when referencing the code
258 ///
259 /// # Limitations
260 /// This can only work if the sourceMappingURL custom section is the last
261 /// section of the WASM.
262 ///
263 /// # How does this work?
264 ///
265 /// The WebAssembly specification contains a "custom" section definition
266 /// which is used to encode the sourcemap url in the WASM binary.
267 ///
268 /// The structure of the custom module is as follows (without ):
269 /// (
270 /// 0 <section_length> (
271 /// <name_length> <name>
272 /// <urllen> <url>
273 /// )
274 /// )
275 ///
276 /// This structure is VLQ encoded without the parentheses and spaces into
277 /// a byte array and appended to the end of the WASM binary.
278 ///
279 /// More details in the [WebAssembly Module Specification](https://webassembly.github.io/spec/core/binary/modules.html)
280 pub fn patch(&mut self, url: &str) -> Result<(), Error> {
281 // Open WASM binary for writing
282 let mut wasm = fs::OpenOptions::new()
283 .write(true)
284 .open(&self.path)
285 .map_err(|err| {
286 format!(
287 "Failed to open WASM file to append sourcemap section: {}",
288 err
289 )
290 })?;
291
292 // Grab the actual size (byte count) of the WASM binary
293 let size = wasm.seek(io::SeekFrom::End(0))?;
294
295 // Determine the file cusrsor position without the custom section (if any)
296 // by subtracting the size of the sourceMappingURL section from the
297 // byte size of the WASM binary
298 let pos = self
299 .sourcemap_size
300 .map(|length| size - length)
301 .unwrap_or(size);
302
303 // Truncate the WASM binary and position the file cursor to the new end
304 // (if there was a sourcemap added), no-op otherwise
305 wasm.set_len(pos)?;
306 wasm.seek(io::SeekFrom::End(0))?;
307
308 // Generate the souceMappingURL custom
309 // section (see above for info on structure)
310 const WASM_CUSTOM_SECTION_ID: u32 = 0;
311 let section_name = "sourceMappingURL";
312 let section_content = [
313 &WASM::encode_uint_var(section_name.len() as u32)[..],
314 section_name.as_bytes(),
315 &WASM::encode_uint_var(url.len() as u32)[..],
316 url.as_bytes(),
317 ]
318 .concat();
319 let section = [
320 &WASM::encode_uint_var(WASM_CUSTOM_SECTION_ID)[..],
321 &WASM::encode_uint_var(section_content.len() as u32)[..],
322 section_content.as_ref(),
323 ]
324 .concat();
325
326 // Write out the custom section
327 wasm.write_all(§ion)
328 .map_err(|err| format!("Failed to write sourcemap section to WASM file: {}", err))?;
329
330 let _s = wasm.seek(io::SeekFrom::End(0));
331
332 // Set the sourcemap data after writing it out
333 self.sourcemap_size = Some(section.len() as u64);
334
335 Ok(())
336 }
337
338 // Generate the sourcemap mappings and source ids.
339 //
340 // The sourcemap 3 format tries to save on file size by using offsets
341 // wherever possible. So we need to encode the source file data and
342 // line, column data for each WASM code segment address in the expected
343 // order, so offsets make sense when resolved by the browser (or debugger)
344 fn generate<'a>(&'a self) -> (Vec<String>, Vec<String>) {
345 // We collect all referenced source code files in a table and use the
346 // source id (which is the value param of this HashMap) as the basis for
347 // the offset when encoding position (i.e. last source id - this source id),
348 // which require preserving the order of inserts!
349 let mut sources: Vec<&'a Path> = Vec::new();
350 //let mut sources: BTreeMap<&'a Path, i64> = BTreeMap::new();
351 //let mut sources: HashMap<&'a Path, i64> = HashMap::new();
352
353 // This is the WASM address -> file:line:col mapping table in the
354 // required format, which is basically offsets written after each other
355 // in the specified order (address, source id, line, finally col)
356 let mut mappings: Vec<String> = Vec::new();
357
358 // These variables track the last of the four pieces of data so we can
359 // subtract from them to get an offset and then update them to the latest
360 let mut last_address: i64 = 0;
361 let mut last_source_id: i64 = 0;
362 let mut last_line: i64 = 1;
363 let mut last_column: i64 = 1;
364
365 for line in self.points.values() {
366 // Line 0 means that this is an intermediate code block and does not
367 // refer to a code block in the source files. We need to skip these
368 // in order to generate the proper offset encoding
369 if line.line == 0 {
370 continue;
371 }
372
373 // We either get the id of a source file if already in the table
374 // or we get the max(id) + 1 as the new id for a previously unseen
375 // source file, which we promptly insert into the source table
376
377 let source_id: i64 =
378 if let Some(id) = sources.iter().position(|&val| val == line.path.as_path()) {
379 id as i64
380 } else {
381 let id = sources.len() as i64;
382 sources.push(&line.path);
383 id
384 };
385
386 // Calculate the offsets (see above)
387 let address_delta = line.address - last_address;
388 let source_id_delta = source_id - last_source_id;
389 let line_delta = line.line - last_line;
390 let column_delta = line.column - last_column;
391
392 // Store the mapping offsets in the specific format
393 // (see above) in the mapping table
394 let mapping = format!(
395 "{}{}{}{}",
396 WASM::vlq_encode(address_delta).as_str(),
397 WASM::vlq_encode(source_id_delta).as_str(),
398 WASM::vlq_encode(line_delta).as_str(),
399 WASM::vlq_encode(column_delta).as_str()
400 );
401 mappings.push(mapping);
402
403 // Update the tracking variables to the freshly calculated values
404 // to use them in the next iteration (see above)
405 last_address = line.address;
406 last_source_id = source_id;
407 last_line = line.line;
408 last_column = line.column;
409 }
410
411 // We only need the file paths from the sources table in the order
412 // they were encoded, turned to strings
413 let source_paths = sources
414 .iter()
415 .filter_map(|p| Some(p.as_os_str().to_str()?.to_owned()))
416 .collect::<Vec<_>>();
417
418 (mappings, source_paths)
419 }
420
421 // Simple implementation of VLQ (variable-length quality) encoding to avoid
422 // yet another dependency to accomplish this simple task
423 //
424 // TODO(mtolmacs): Use smallvec instead of string
425 fn vlq_encode(value: i64) -> String {
426 const VLQ_CHARS: &[u8] =
427 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".as_bytes();
428 let mut x = if value >= 0 {
429 value << 1
430 } else {
431 (-value << 1) + 1
432 };
433 let mut result = String::new();
434
435 while x > 31 {
436 let idx: usize = (32 + (x & 31)).try_into().unwrap();
437 let ch: char = VLQ_CHARS[idx].into();
438 result.push(ch);
439 x >>= 5;
440 }
441 let idx: usize = x.try_into().unwrap();
442 let ch: char = VLQ_CHARS[idx].into();
443 result.push(ch);
444
445 result
446 }
447
448 fn encode_uint_var(mut n: u32) -> Vec<u8> {
449 let mut result = Vec::new();
450 while n > 127 {
451 result.push((128 | (n & 127)) as u8);
452 n >>= 7;
453 }
454 result.push(n as u8);
455 result
456 }
457}