symbolic_debuginfo/wasm/parser.rs
1//! Contains utilities for parsing a WASM module to retrieve the information needed by [`super::WasmObject`]
2
3use super::WasmError;
4use crate::base::{ObjectKind, Symbol};
5use wasmparser::{
6 BinaryReader, CompositeInnerType, FuncValidatorAllocations, NameSectionReader, Payload,
7 TypeRef, Validator, WasmFeatures,
8};
9
10#[derive(Default)]
11struct BitVec {
12 data: Vec<u64>,
13 len: usize,
14}
15
16impl BitVec {
17 pub fn new() -> Self {
18 Self::default()
19 }
20
21 pub fn resize(&mut self, count: usize, value: bool) {
22 self.data.resize(
23 count.div_ceil(u64::BITS as usize),
24 if value { u64::MAX } else { u64::MIN },
25 );
26 self.len = count;
27 }
28
29 pub fn set(&mut self, index: usize, value: bool) {
30 assert!(index < self.len);
31 let vec_index = index / u64::BITS as usize;
32 let item_bit = index % u64::BITS as usize;
33 if value {
34 self.data[vec_index] |= 1 << item_bit;
35 } else {
36 self.data[vec_index] &= !(1 << item_bit);
37 }
38 }
39
40 pub fn get(&self, index: usize) -> Option<bool> {
41 if index >= self.len {
42 None
43 } else {
44 let vec_index = index / u64::BITS as usize;
45 let item_bit = index % u64::BITS as usize;
46 Some(self.data[vec_index] & (1 << item_bit) != 0)
47 }
48 }
49}
50
51impl<'data> super::WasmObject<'data> {
52 /// Tries to parse a WASM from the given slice.
53 pub fn parse(data: &'data [u8]) -> Result<Self, WasmError> {
54 let mut code_offset = 0;
55 let mut build_id = None;
56 let mut dwarf_sections = Vec::new();
57 let mut kind = ObjectKind::Debug;
58
59 // In "normal" wasm modules the only types will be function signatures, but in the future it
60 // could contain types used for module linking, but we don't actually care about the types,
61 // just that the function references a valid signature, so we just keep a bitset of the function
62 // signatures to verify that
63 let mut func_sigs = BitVec::new();
64 let features = WasmFeatures::all();
65 let mut validator = Validator::new_with_features(features);
66 let mut funcs = Vec::<Symbol>::new();
67 let mut num_imported_funcs = 0u32;
68 let mut func_allocs = FuncValidatorAllocations::default();
69
70 // Parse the wasm file to pull out the function and their starting address, size, and name
71 // Note that the order of the payloads here are the order that they will appear in (valid)
72 // wasm binaries, other than the sections that we need to parse to validate the module, which
73 // are at the end
74 for payload in wasmparser::Parser::new(0).parse_all(data) {
75 let payload = payload?;
76 match payload {
77 // The type section contains, well, types, specifically, function signatures that are
78 // later referenced by the function section.
79 Payload::TypeSection(tsr) => {
80 validator.type_section(&tsr)?;
81 func_sigs.resize(tsr.count() as usize, false);
82
83 for (i, ty) in tsr.into_iter().enumerate() {
84 let mut types = ty?.into_types();
85 let ty_is_func = matches!(
86 types.next().map(|s| s.composite_type.inner),
87 Some(CompositeInnerType::Func(_))
88 );
89 if types.next().is_none() && ty_is_func {
90 func_sigs.set(i, true);
91 }
92 }
93 }
94 // Imported functions and local functions both use the same ID space, but imported
95 // functions are never exposed, so we just need to account for the id offset later
96 // when parsing the local functions
97 Payload::ImportSection(isr) => {
98 validator.import_section(&isr)?;
99
100 for import in isr {
101 let import = import?;
102 if let TypeRef::Func(id) = import.ty {
103 if !func_sigs.get(id as usize).unwrap_or(false) {
104 return Err(WasmError::UnknownFunctionType);
105 }
106
107 num_imported_funcs += 1;
108 }
109 }
110 }
111 // The function section declares all of the local functions present in the module
112 Payload::FunctionSection(fsr) => {
113 validator.function_section(&fsr)?;
114
115 if fsr.count() > 0 {
116 kind = ObjectKind::Library;
117 }
118
119 funcs.reserve(fsr.count() as usize);
120
121 // We actually don't care about the type signature of the function, other than that
122 // they exist
123 for id in fsr {
124 if !func_sigs.get(id? as usize).unwrap_or(false) {
125 return Err(WasmError::UnknownFunctionType);
126 }
127 }
128 }
129
130 // The code section contains the actual function bodies, this payload is emitted at
131 // the beginning of the section. This one is important as the code section offset is
132 // used to calculate relative addresses in a `DwarfDebugSession`
133 Payload::CodeSectionStart { range, count, .. } => {
134 code_offset = range.start as u64;
135 validator.code_section_start(count, &range)?;
136 }
137 // We get one of these for each local function body
138 Payload::CodeSectionEntry(body) => {
139 let mut validator = validator
140 .code_section_entry(&body)?
141 .into_validator(func_allocs);
142
143 let (address, size) = get_function_info(body, &mut validator)?;
144
145 func_allocs = validator.into_allocations();
146
147 // Though we have an accurate? size of the function body, the old method of symbol
148 // iterating with walrus extends the size of each body to be contiguous with the
149 // next function, so we do the same, other than the final function
150 if let Some(prev) = funcs.last_mut() {
151 prev.size = address - prev.address;
152 }
153
154 funcs.push(Symbol {
155 name: None,
156 address,
157 size,
158 });
159 }
160
161 Payload::ModuleSection {
162 unchecked_range, ..
163 } => {
164 validator.module_section(&unchecked_range)?;
165 }
166 // There are several custom sections that we need
167 Payload::CustomSection(reader) => {
168 match reader.name() {
169 // this section is not defined yet
170 // see https://github.com/WebAssembly/tool-conventions/issues/133
171 "build_id" => {
172 build_id = Some(reader.data());
173 }
174 // All of the dwarf debug sections (.debug_frame, .debug_info etc) start with a `.`, and
175 // are the only ones we need for walking the debug info
176 debug if debug.starts_with('.') => {
177 dwarf_sections.push((debug, reader.data()));
178 }
179 // The name section contains the symbol names for items, notably functions
180 "name" => {
181 let reader =
182 BinaryReader::new(reader.data(), reader.data_offset(), features);
183 let nsr = NameSectionReader::new(reader);
184
185 for name in nsr {
186 if let wasmparser::Name::Function(fnames) = name? {
187 for fname in fnames {
188 let fname = fname?;
189
190 // The names for imported functions are also in this table, but
191 // we don't care about them
192 if fname.index >= num_imported_funcs {
193 if let Some(func) = funcs.get_mut(
194 (fname.index - num_imported_funcs) as usize,
195 ) {
196 func.name =
197 Some(std::borrow::Cow::Borrowed(fname.name));
198 }
199 }
200 }
201 }
202 }
203 }
204 _ => {}
205 }
206 }
207
208 // All other sections are not used by this crate, but some (eg table/memory/global)
209 // are needed to validate the sections that we do care about, so we just validate all
210 // of the payloads we don't use to be sure
211 payload => {
212 validator.payload(&payload)?;
213 }
214 }
215 }
216
217 Ok(Self {
218 dwarf_sections,
219 funcs,
220 build_id,
221 data,
222 code_offset,
223 kind,
224 })
225 }
226}
227
228fn get_function_info(
229 body: wasmparser::FunctionBody,
230 validator: &mut wasmparser::FuncValidator<wasmparser::ValidatorResources>,
231) -> Result<(u64, u64), WasmError> {
232 let mut body = body.get_binary_reader();
233
234 let function_address = body.original_position() as u64;
235
236 // locals, we _can_ just skip this, but might as well validate while we're here
237 {
238 for _ in 0..body.read_var_u32()? {
239 let pos = body.original_position();
240 let count = body.read()?;
241 let ty = body.read()?;
242 validator.define_locals(pos, count, ty)?;
243 }
244 }
245
246 while !body.eof() {
247 let pos = body.original_position();
248 let inst = body.read_operator()?;
249 validator.op(pos, &inst)?;
250 }
251
252 validator.finish(body.original_position())?;
253
254 Ok((
255 function_address,
256 body.original_position() as u64 - function_address,
257 ))
258}