symbolic_debuginfo/wasm/
parser.rs

1//! Contains utilities for parsing a WASM module to retrieve the information needed by [`super::WasmObject`]
2
3use super::WasmError;
4use crate::base::{ObjectKind, Symbol};
5use wasmparser::{
6    BinaryReader, CompositeInnerType, FuncValidatorAllocations, NameSectionReader, Payload,
7    TypeRef, Validator, WasmFeatures,
8};
9
10#[derive(Default)]
11struct BitVec {
12    data: Vec<u64>,
13    len: usize,
14}
15
16impl BitVec {
17    pub fn new() -> Self {
18        Self::default()
19    }
20
21    pub fn resize(&mut self, count: usize, value: bool) {
22        self.data.resize(
23            count.div_ceil(u64::BITS as usize),
24            if value { u64::MAX } else { u64::MIN },
25        );
26        self.len = count;
27    }
28
29    pub fn set(&mut self, index: usize, value: bool) {
30        assert!(index < self.len);
31        let vec_index = index / u64::BITS as usize;
32        let item_bit = index % u64::BITS as usize;
33        if value {
34            self.data[vec_index] |= 1 << item_bit;
35        } else {
36            self.data[vec_index] &= !(1 << item_bit);
37        }
38    }
39
40    pub fn get(&self, index: usize) -> Option<bool> {
41        if index >= self.len {
42            None
43        } else {
44            let vec_index = index / u64::BITS as usize;
45            let item_bit = index % u64::BITS as usize;
46            Some(self.data[vec_index] & (1 << item_bit) != 0)
47        }
48    }
49}
50
51impl<'data> super::WasmObject<'data> {
52    /// Tries to parse a WASM from the given slice.
53    pub fn parse(data: &'data [u8]) -> Result<Self, WasmError> {
54        let mut code_offset = 0;
55        let mut build_id = None;
56        let mut dwarf_sections = Vec::new();
57        let mut kind = ObjectKind::Debug;
58
59        // In "normal" wasm modules the only types will be function signatures, but in the future it
60        // could contain types used for module linking, but we don't actually care about the types,
61        // just that the function references a valid signature, so we just keep a bitset of the function
62        // signatures to verify that
63        let mut func_sigs = BitVec::new();
64        let features = WasmFeatures::all();
65        let mut validator = Validator::new_with_features(features);
66        let mut funcs = Vec::<Symbol>::new();
67        let mut num_imported_funcs = 0u32;
68        let mut func_allocs = FuncValidatorAllocations::default();
69
70        // Parse the wasm file to pull out the function and their starting address, size, and name
71        // Note that the order of the payloads here are the order that they will appear in (valid)
72        // wasm binaries, other than the sections that we need to parse to validate the module, which
73        // are at the end
74        for payload in wasmparser::Parser::new(0).parse_all(data) {
75            let payload = payload?;
76            match payload {
77                // The type section contains, well, types, specifically, function signatures that are
78                // later referenced by the function section.
79                Payload::TypeSection(tsr) => {
80                    validator.type_section(&tsr)?;
81                    func_sigs.resize(tsr.count() as usize, false);
82
83                    for (i, ty) in tsr.into_iter().enumerate() {
84                        let mut types = ty?.into_types();
85                        let ty_is_func = matches!(
86                            types.next().map(|s| s.composite_type.inner),
87                            Some(CompositeInnerType::Func(_))
88                        );
89                        if types.next().is_none() && ty_is_func {
90                            func_sigs.set(i, true);
91                        }
92                    }
93                }
94                // Imported functions and local functions both use the same ID space, but imported
95                // functions are never exposed, so we just need to account for the id offset later
96                // when parsing the local functions
97                Payload::ImportSection(isr) => {
98                    validator.import_section(&isr)?;
99
100                    for import in isr {
101                        let import = import?;
102                        if let TypeRef::Func(id) = import.ty {
103                            if !func_sigs.get(id as usize).unwrap_or(false) {
104                                return Err(WasmError::UnknownFunctionType);
105                            }
106
107                            num_imported_funcs += 1;
108                        }
109                    }
110                }
111                // The function section declares all of the local functions present in the module
112                Payload::FunctionSection(fsr) => {
113                    validator.function_section(&fsr)?;
114
115                    if fsr.count() > 0 {
116                        kind = ObjectKind::Library;
117                    }
118
119                    funcs.reserve(fsr.count() as usize);
120
121                    // We actually don't care about the type signature of the function, other than that
122                    // they exist
123                    for id in fsr {
124                        if !func_sigs.get(id? as usize).unwrap_or(false) {
125                            return Err(WasmError::UnknownFunctionType);
126                        }
127                    }
128                }
129
130                // The code section contains the actual function bodies, this payload is emitted at
131                // the beginning of the section. This one is important as the code section offset is
132                // used to calculate relative addresses in a `DwarfDebugSession`
133                Payload::CodeSectionStart { range, count, .. } => {
134                    code_offset = range.start as u64;
135                    validator.code_section_start(count, &range)?;
136                }
137                // We get one of these for each local function body
138                Payload::CodeSectionEntry(body) => {
139                    let mut validator = validator
140                        .code_section_entry(&body)?
141                        .into_validator(func_allocs);
142
143                    let (address, size) = get_function_info(body, &mut validator)?;
144
145                    func_allocs = validator.into_allocations();
146
147                    // Though we have an accurate? size of the function body, the old method of symbol
148                    // iterating with walrus extends the size of each body to be contiguous with the
149                    // next function, so we do the same, other than the final function
150                    if let Some(prev) = funcs.last_mut() {
151                        prev.size = address - prev.address;
152                    }
153
154                    funcs.push(Symbol {
155                        name: None,
156                        address,
157                        size,
158                    });
159                }
160
161                Payload::ModuleSection {
162                    unchecked_range, ..
163                } => {
164                    validator.module_section(&unchecked_range)?;
165                }
166                // There are several custom sections that we need
167                Payload::CustomSection(reader) => {
168                    match reader.name() {
169                        // this section is not defined yet
170                        // see https://github.com/WebAssembly/tool-conventions/issues/133
171                        "build_id" => {
172                            build_id = Some(reader.data());
173                        }
174                        // All of the dwarf debug sections (.debug_frame, .debug_info etc) start with a `.`, and
175                        // are the only ones we need for walking the debug info
176                        debug if debug.starts_with('.') => {
177                            dwarf_sections.push((debug, reader.data()));
178                        }
179                        // The name section contains the symbol names for items, notably functions
180                        "name" => {
181                            let reader =
182                                BinaryReader::new(reader.data(), reader.data_offset(), features);
183                            let nsr = NameSectionReader::new(reader);
184
185                            for name in nsr {
186                                if let wasmparser::Name::Function(fnames) = name? {
187                                    for fname in fnames {
188                                        let fname = fname?;
189
190                                        // The names for imported functions are also in this table, but
191                                        // we don't care about them
192                                        if fname.index >= num_imported_funcs {
193                                            if let Some(func) = funcs.get_mut(
194                                                (fname.index - num_imported_funcs) as usize,
195                                            ) {
196                                                func.name =
197                                                    Some(std::borrow::Cow::Borrowed(fname.name));
198                                            }
199                                        }
200                                    }
201                                }
202                            }
203                        }
204                        _ => {}
205                    }
206                }
207
208                // All other sections are not used by this crate, but some (eg table/memory/global)
209                // are needed to validate the sections that we do care about, so we just validate all
210                // of the payloads we don't use to be sure
211                payload => {
212                    validator.payload(&payload)?;
213                }
214            }
215        }
216
217        Ok(Self {
218            dwarf_sections,
219            funcs,
220            build_id,
221            data,
222            code_offset,
223            kind,
224        })
225    }
226}
227
228fn get_function_info(
229    body: wasmparser::FunctionBody,
230    validator: &mut wasmparser::FuncValidator<wasmparser::ValidatorResources>,
231) -> Result<(u64, u64), WasmError> {
232    let mut body = body.get_binary_reader();
233
234    let function_address = body.original_position() as u64;
235
236    // locals, we _can_ just skip this, but might as well validate while we're here
237    {
238        for _ in 0..body.read_var_u32()? {
239            let pos = body.original_position();
240            let count = body.read()?;
241            let ty = body.read()?;
242            validator.define_locals(pos, count, ty)?;
243        }
244    }
245
246    while !body.eof() {
247        let pos = body.original_position();
248        let inst = body.read_operator()?;
249        validator.op(pos, &inst)?;
250    }
251
252    validator.finish(body.original_position())?;
253
254    Ok((
255        function_address,
256        body.original_position() as u64 - function_address,
257    ))
258}