Skip to main content

symbolic_debuginfo/wasm/
parser.rs

1//! Contains utilities for parsing a WASM module to retrieve the information needed by [`super::WasmObject`]
2
3use super::WasmError;
4use crate::base::{ObjectKind, Symbol};
5use wasmparser::{
6    BinaryReader, CompositeInnerType, FuncValidatorAllocations, NameSectionReader, Payload,
7    TypeRef, Validator, WasmFeatures,
8};
9
10#[derive(Default)]
11struct BitVec {
12    data: Vec<u64>,
13    len: usize,
14}
15
16impl BitVec {
17    pub fn new() -> Self {
18        Self::default()
19    }
20
21    pub fn resize(&mut self, count: usize, value: bool) {
22        self.data.resize(
23            count.div_ceil(u64::BITS as usize),
24            if value { u64::MAX } else { u64::MIN },
25        );
26        self.len = count;
27    }
28
29    pub fn set(&mut self, index: usize, value: bool) {
30        assert!(index < self.len);
31        let vec_index = index / u64::BITS as usize;
32        let item_bit = index % u64::BITS as usize;
33        if value {
34            self.data[vec_index] |= 1 << item_bit;
35        } else {
36            self.data[vec_index] &= !(1 << item_bit);
37        }
38    }
39
40    pub fn get(&self, index: usize) -> Option<bool> {
41        if index >= self.len {
42            None
43        } else {
44            let vec_index = index / u64::BITS as usize;
45            let item_bit = index % u64::BITS as usize;
46            Some(self.data[vec_index] & (1 << item_bit) != 0)
47        }
48    }
49}
50
51impl<'data> super::WasmObject<'data> {
52    /// Tries to parse a WASM from the given slice.
53    pub fn parse(data: &'data [u8]) -> Result<Self, WasmError> {
54        let mut code_offset = 0;
55        let mut build_id = None;
56        let mut dwarf_sections = Vec::new();
57        let mut kind = ObjectKind::Debug;
58
59        // In "normal" wasm modules the only types will be function signatures, but in the future it
60        // could contain types used for module linking, but we don't actually care about the types,
61        // just that the function references a valid signature, so we just keep a bitset of the function
62        // signatures to verify that
63        let mut func_sigs = BitVec::new();
64        let features = WasmFeatures::all();
65        let mut validator = Validator::new_with_features(features);
66        let mut funcs = Vec::<Symbol>::new();
67        let mut num_imported_funcs = 0u32;
68        let mut func_allocs = FuncValidatorAllocations::default();
69
70        // Parse the wasm file to pull out the function and their starting address, size, and name
71        // Note that the order of the payloads here are the order that they will appear in (valid)
72        // wasm binaries, other than the sections that we need to parse to validate the module, which
73        // are at the end
74        for payload in wasmparser::Parser::new(0).parse_all(data) {
75            let payload = payload?;
76            match payload {
77                // The type section contains, well, types, specifically, function signatures that are
78                // later referenced by the function section.
79                Payload::TypeSection(tsr) => {
80                    validator.type_section(&tsr)?;
81                    func_sigs.resize(tsr.count() as usize, false);
82
83                    for (i, ty) in tsr.into_iter().enumerate() {
84                        let mut types = ty?.into_types();
85                        let ty_is_func = matches!(
86                            types.next().map(|s| s.composite_type.inner),
87                            Some(CompositeInnerType::Func(_))
88                        );
89                        if types.next().is_none() && ty_is_func {
90                            func_sigs.set(i, true);
91                        }
92                    }
93                }
94                // Imported functions and local functions both use the same ID space, but imported
95                // functions are never exposed, so we just need to account for the id offset later
96                // when parsing the local functions
97                Payload::ImportSection(isr) => {
98                    validator.import_section(&isr)?;
99
100                    for import in isr {
101                        let import = import?;
102                        if let TypeRef::Func(id) = import.ty {
103                            if !func_sigs.get(id as usize).unwrap_or(false) {
104                                return Err(WasmError::UnknownFunctionType);
105                            }
106
107                            num_imported_funcs += 1;
108                        }
109                    }
110                }
111                // The function section declares all of the local functions present in the module
112                Payload::FunctionSection(fsr) => {
113                    validator.function_section(&fsr)?;
114
115                    if fsr.count() > 0 {
116                        kind = ObjectKind::Library;
117                    }
118
119                    funcs.reserve(fsr.count() as usize);
120
121                    // We actually don't care about the type signature of the function, other than that
122                    // they exist
123                    for id in fsr {
124                        if !func_sigs.get(id? as usize).unwrap_or(false) {
125                            return Err(WasmError::UnknownFunctionType);
126                        }
127                    }
128                }
129
130                // The code section contains the actual function bodies, this payload is emitted at
131                // the beginning of the section. This one is important as the code section offset is
132                // used to calculate relative addresses in a `DwarfDebugSession`
133                Payload::CodeSectionStart { range, .. } => {
134                    code_offset = range.start as u64;
135                    validator.code_section_start(&range)?;
136                }
137                // We get one of these for each local function body
138                Payload::CodeSectionEntry(body) => {
139                    let mut validator = validator
140                        .code_section_entry(&body)?
141                        .into_validator(func_allocs);
142
143                    let (address, size) = get_function_info(body, &mut validator)?;
144
145                    func_allocs = validator.into_allocations();
146
147                    // Though we have an accurate? size of the function body, the old method of symbol
148                    // iterating with walrus extends the size of each body to be contiguous with the
149                    // next function, so we do the same, other than the final function
150                    if let Some(prev) = funcs.last_mut() {
151                        prev.size = address - prev.address;
152                    }
153
154                    funcs.push(Symbol {
155                        name: None,
156                        address,
157                        size,
158                    });
159                }
160
161                Payload::ModuleSection {
162                    unchecked_range, ..
163                } => {
164                    validator.module_section(&unchecked_range)?;
165                }
166                // There are several custom sections that we need
167                Payload::CustomSection(reader) => {
168                    match reader.name() {
169                        // this section is not defined yet
170                        // see https://github.com/WebAssembly/tool-conventions/issues/133
171                        "build_id" => {
172                            build_id = Some(reader.data());
173                        }
174                        // All of the dwarf debug sections (.debug_frame, .debug_info etc) start with a `.`, and
175                        // are the only ones we need for walking the debug info
176                        debug if debug.starts_with('.') => {
177                            dwarf_sections.push((debug, reader.data()));
178                        }
179                        // The name section contains the symbol names for items, notably functions
180                        "name" => {
181                            let reader = BinaryReader::new_features(
182                                reader.data(),
183                                reader.data_offset(),
184                                features,
185                            );
186                            let nsr = NameSectionReader::new(reader);
187
188                            for name in nsr {
189                                if let wasmparser::Name::Function(fnames) = name? {
190                                    for fname in fnames {
191                                        let fname = fname?;
192
193                                        // The names for imported functions are also in this table, but
194                                        // we don't care about them
195                                        if fname.index >= num_imported_funcs {
196                                            if let Some(func) = funcs.get_mut(
197                                                (fname.index - num_imported_funcs) as usize,
198                                            ) {
199                                                func.name =
200                                                    Some(std::borrow::Cow::Borrowed(fname.name));
201                                            }
202                                        }
203                                    }
204                                }
205                            }
206                        }
207                        _ => {}
208                    }
209                }
210
211                // All other sections are not used by this crate, but some (eg table/memory/global)
212                // are needed to validate the sections that we do care about, so we just validate all
213                // of the payloads we don't use to be sure
214                payload => {
215                    validator.payload(&payload)?;
216                }
217            }
218        }
219
220        Ok(Self {
221            dwarf_sections,
222            funcs,
223            build_id,
224            data,
225            code_offset,
226            kind,
227        })
228    }
229}
230
231fn get_function_info(
232    body: wasmparser::FunctionBody,
233    validator: &mut wasmparser::FuncValidator<wasmparser::ValidatorResources>,
234) -> Result<(u64, u64), WasmError> {
235    let mut locals_reader = body.get_binary_reader();
236    let function_address = locals_reader.original_position() as u64;
237
238    // locals, we _can_ just skip this, but might as well validate while we're here
239    {
240        for _ in 0..locals_reader.read_var_u32()? {
241            let pos = locals_reader.original_position();
242            let count = locals_reader.read()?;
243            let ty = locals_reader.read()?;
244            validator.define_locals(pos, count, ty)?;
245        }
246    }
247
248    let mut operators_reader = body.get_operators_reader()?;
249    while !operators_reader.eof() {
250        let pos = operators_reader.original_position();
251        let inst = operators_reader.read()?;
252        validator.op(pos, &inst)?;
253    }
254
255    operators_reader.finish()?;
256
257    Ok((
258        function_address,
259        operators_reader.original_position() as u64 - function_address,
260    ))
261}