multiversx_sc_meta_lib/tools/wasm_extractor/
extractor.rs

1use colored::Colorize;
2use std::{
3    collections::{HashMap, HashSet},
4    fs,
5    path::{Path, PathBuf},
6};
7use wasmparser::{
8    BinaryReaderError, CompositeInnerType, DataSectionReader, ElementItems, ElementSectionReader,
9    ExportSectionReader, FunctionBody, ImportSectionReader, Operator, Parser, Payload, TypeRef,
10    TypeSectionReader, ValType,
11};
12
13use crate::{ei::EIVersion, tools::CodeReport};
14
15use super::{
16    opcode_whitelist::is_opcode_whitelisted, report::WasmReport, CallGraph, EndpointInfo,
17    FunctionInfo, OpcodeVersion,
18};
19
20const ERROR_FAIL_ALLOCATOR: &[u8; 27] = b"memory allocation forbidden";
21const WRITE_OP: &[&str] = &[
22    "mBufferStorageStore",
23    "storageStore",
24    "int64storageStore",
25    "bigIntStorageStoreUnsigned",
26    "smallIntStorageStoreUnsigned",
27    "smallIntStorageStoreSigned",
28];
29
30#[derive(Default, Debug, Clone)]
31pub struct WasmInfo {
32    pub call_graph: CallGraph,
33    pub write_index_functions: HashSet<usize>,
34    pub report: WasmReport,
35    pub data: Vec<u8>,
36    pub func_types: HashMap<u32, FunctionType>,
37}
38
39#[derive(Debug, Clone)]
40pub struct FunctionType {
41    pub params: Vec<ValType>,
42    pub returns: Vec<ValType>,
43}
44
45impl WasmInfo {
46    pub fn extract_wasm_report(
47        output_wasm_path: &PathBuf,
48        extract_imports_enabled: bool,
49        check_ei: Option<&EIVersion>,
50        endpoints: &HashMap<&str, bool>,
51        opcode_version: OpcodeVersion,
52    ) -> WasmReport {
53        let wasm_data = fs::read(output_wasm_path)
54            .expect("error occurred while extracting information from .wasm: file not found");
55
56        let wasm_info = WasmInfo::default()
57            .add_endpoints(endpoints)
58            .add_path(output_wasm_path)
59            .add_wasm_data(&wasm_data)
60            .populate_wasm_info(extract_imports_enabled, check_ei, opcode_version)
61            .expect("error occurred while extracting information from .wasm file");
62
63        wasm_info.report
64    }
65
66    pub(crate) fn populate_wasm_info(
67        self,
68        import_extraction_enabled: bool,
69        check_ei: Option<&EIVersion>,
70        opcode_version: OpcodeVersion,
71    ) -> Result<WasmInfo, BinaryReaderError> {
72        let parser = Parser::new(0);
73        let mut wasm_info = self.clone();
74
75        for payload in parser.parse_all(&self.data) {
76            match payload? {
77                Payload::TypeSection(type_section) => {
78                    wasm_info.parse_type_section(type_section);
79                }
80                Payload::ImportSection(import_section) => {
81                    wasm_info.process_imports(import_section, import_extraction_enabled);
82                    wasm_info.report.ei_check |= is_ei_valid(&wasm_info.report.imports, check_ei);
83                }
84                Payload::DataSection(data_section) => {
85                    wasm_info.report.code.has_allocator |=
86                        is_fail_allocator_triggered(data_section.clone());
87                    wasm_info.report.code.has_panic.max_severity(data_section);
88                }
89                Payload::CodeSectionEntry(code_section) => {
90                    wasm_info.report.memory_grow_flag |= is_mem_grow(&code_section);
91                    wasm_info.create_call_graph(code_section, opcode_version);
92                }
93                Payload::ExportSection(export_section) => {
94                    wasm_info.parse_export_section(export_section);
95                }
96                Payload::ElementSection(elem_section) => {
97                    wasm_info.parse_element_section(elem_section);
98                }
99                _ => {}
100            }
101        }
102
103        wasm_info
104            .call_graph
105            .populate_accessible_from_function_indexes();
106        wasm_info
107            .call_graph
108            .populate_accessible_from_call_indirect();
109        wasm_info.call_graph.populate_function_endpoints();
110        wasm_info
111            .call_graph
112            .populate_call_indirect_accessible_from_endpoints();
113
114        wasm_info.detect_write_operations_in_views();
115        wasm_info.detect_forbidden_opcodes();
116
117        Ok(wasm_info)
118    }
119
120    fn parse_type_section(&mut self, type_section: TypeSectionReader) {
121        for (ty_index, ty_result) in type_section.into_iter().enumerate() {
122            let rec_group = ty_result.expect("Failed to read type section");
123            for sub_type in rec_group.into_types() {
124                if let CompositeInnerType::Func(func_ty) = sub_type.composite_type.inner {
125                    let ft = FunctionType {
126                        params: func_ty.params().to_vec(),
127                        returns: func_ty.results().to_vec(),
128                    };
129                    self.func_types.insert(ty_index as u32, ft);
130                }
131            }
132        }
133    }
134
135    pub(crate) fn add_endpoints(mut self, endpoints: &HashMap<&str, bool>) -> Self {
136        for (name, readonly) in endpoints {
137            self.call_graph
138                .endpoints
139                .insert(name.to_string(), EndpointInfo::default(*readonly));
140        }
141
142        self
143    }
144
145    pub(crate) fn add_wasm_data(self, data: &[u8]) -> Self {
146        WasmInfo {
147            data: data.to_vec(),
148            ..self
149        }
150    }
151
152    fn add_path(self, path: &Path) -> Self {
153        WasmInfo {
154            report: WasmReport {
155                code: CodeReport {
156                    path: path.to_path_buf(),
157                    ..self.report.code
158                },
159                ..self.report
160            },
161            ..self
162        }
163    }
164
165    fn create_call_graph(&mut self, body: FunctionBody, opcode_version: OpcodeVersion) {
166        let mut instructions_reader = body
167            .get_operators_reader()
168            .expect("Failed to get operators reader");
169
170        let mut function_info = FunctionInfo::new();
171        let function_index = self.call_graph.next_function_index();
172        while let Ok(op) = instructions_reader.read() {
173            match op {
174                Operator::Call { function_index } => {
175                    function_info.add_called_function(function_index as usize);
176                }
177                Operator::CallIndirect { .. } => {
178                    function_info.contains_call_indirect = true;
179                }
180                _ => {}
181            }
182
183            if !is_opcode_whitelisted(&op, opcode_version) {
184                let opcode = extract_opcode(op);
185                function_info.add_forbidden_opcode(opcode);
186            }
187        }
188
189        self.call_graph
190            .insert_function(function_index, function_info);
191    }
192
193    fn process_imports(
194        &mut self,
195        import_section: ImportSectionReader,
196        import_extraction_enabled: bool,
197    ) {
198        let signature_map = crate::ei::vm_hook_signature_map();
199
200        for (index, import) in import_section.into_iter().flatten().enumerate() {
201            if let TypeRef::Func(type_index) = &import.ty {
202                let func_type = self
203                    .func_types
204                    .get(type_index)
205                    .expect("invalid wasm function type index");
206
207                crate::ei::check_vm_hook_signatures(
208                    import.name,
209                    &func_type.params,
210                    &func_type.returns,
211                    &signature_map,
212                );
213                if import_extraction_enabled {
214                    self.report.imports.push(import.name.to_string());
215                }
216                self.call_graph.insert_function(index, FunctionInfo::new());
217                if WRITE_OP.contains(&import.name) {
218                    self.write_index_functions.insert(index);
219                }
220            }
221        }
222
223        self.report.imports.sort();
224    }
225
226    fn detect_write_operations_in_views(&mut self) {
227        let mut visited: HashSet<usize> = HashSet::new();
228
229        for index in get_view_endpoints_indexes(&self.call_graph.endpoints) {
230            mark_write(self, index, &mut visited);
231        }
232
233        for (name, index) in get_view_endpoints(&self.call_graph.endpoints) {
234            if self.write_index_functions.contains(&index) {
235                println!(
236                    "{} {}",
237                    "Write storage operation in VIEW endpoint:"
238                        .to_string()
239                        .red()
240                        .bold(),
241                    name.red().bold()
242                );
243            }
244        }
245    }
246
247    fn detect_forbidden_opcodes(&mut self) {
248        for (&func_index, func_info) in &self.call_graph.function_map {
249            if func_info.forbidden_opcodes.is_empty() {
250                continue;
251            }
252
253            let opcodes = func_info
254                .forbidden_opcodes
255                .iter()
256                .cloned()
257                .collect::<Vec<String>>()
258                .join(", ");
259            let mut message =
260                format!("Forbidden opcodes detected in function {func_index}: {opcodes}.");
261
262            let endpoints = self
263                .call_graph
264                .function_accessible_from_endpoints(func_index);
265            if !endpoints.is_empty() {
266                message.push_str(&format!(
267                    " This function is accessible endpoints: {}.",
268                    endpoints
269                        .iter()
270                        .cloned()
271                        .collect::<Vec<String>>()
272                        .join(", ")
273                ));
274            }
275            for endpoint in endpoints {
276                for forbidden_opcode in &func_info.forbidden_opcodes {
277                    self.report.add_forbidden_opcode_accessible_from_endpoint(
278                        endpoint.clone(),
279                        forbidden_opcode.clone(),
280                    );
281                }
282            }
283
284            if func_info.accessible_from_call_indirect {
285                for endpoint in &self.call_graph.call_indirect_accessible_from_endpoints {
286                    for forbidden_opcode in &func_info.forbidden_opcodes {
287                        self.report.add_forbidden_opcode_accessible_from_endpoint(
288                            endpoint.clone(),
289                            forbidden_opcode.clone(),
290                        );
291                    }
292                }
293                message.push_str(&format!(
294                    " This function is accessible via call_indirect, from endpoints: {}.",
295                    self.call_graph
296                        .call_indirect_accessible_from_endpoints
297                        .iter()
298                        .cloned()
299                        .collect::<Vec<String>>()
300                        .join(", ")
301                ));
302            }
303
304            println!("{}", message.red().bold());
305        }
306    }
307
308    fn parse_export_section(&mut self, export_section: ExportSectionReader) {
309        if self.call_graph.endpoints.is_empty() {
310            return;
311        }
312
313        for export in export_section {
314            let export = export.expect("Failed to read export section");
315            if wasmparser::ExternalKind::Func == export.kind {
316                if let Some(endpoint) = self.call_graph.endpoints.get_mut(export.name) {
317                    endpoint.set_index(export.index.try_into().unwrap());
318                }
319            }
320        }
321    }
322
323    fn parse_element_section(&mut self, element_section: ElementSectionReader) {
324        for t in element_section.into_iter() {
325            let element = t.expect("Failed to read table section");
326
327            if let ElementItems::Functions(functions) = element.items {
328                for func_result in functions {
329                    let function_index =
330                        func_result.expect("Failed to read function index in element section");
331                    self.call_graph
332                        .table_functions
333                        .push(function_index as usize);
334                }
335            }
336        }
337    }
338}
339
340pub(crate) fn get_view_endpoints_indexes(endpoints: &HashMap<String, EndpointInfo>) -> Vec<usize> {
341    endpoints
342        .values()
343        .filter(|endpoint_info| endpoint_info.readonly)
344        .map(|endpoint_info| endpoint_info.index)
345        .collect()
346}
347
348pub(crate) fn get_view_endpoints(
349    endpoints: &HashMap<String, EndpointInfo>,
350) -> HashMap<&str, usize> {
351    let mut view_endpoints = HashMap::new();
352
353    for (name, endpoint_info) in endpoints {
354        if endpoint_info.readonly {
355            view_endpoints.insert(name.as_str(), endpoint_info.index);
356        }
357    }
358
359    view_endpoints
360}
361
362fn is_fail_allocator_triggered(data_section: DataSectionReader) -> bool {
363    for data_fragment in data_section.into_iter().flatten() {
364        if data_fragment
365            .data
366            .windows(ERROR_FAIL_ALLOCATOR.len())
367            .any(|data| data == ERROR_FAIL_ALLOCATOR)
368        {
369            println!(
370                "{}",
371                "FailAllocator used while memory allocation is accessible in code. Contract may fail unexpectedly when memory allocation is attempted"
372                    .to_string()
373                    .red()
374                    .bold()
375            );
376            return true;
377        }
378    }
379
380    false
381}
382
383fn mark_write(wasm_info: &mut WasmInfo, func: usize, visited: &mut HashSet<usize>) {
384    // Return early to prevent cycles.
385    if visited.contains(&func) {
386        return;
387    }
388
389    visited.insert(func);
390
391    let callees: Vec<usize> = if let Some(callees) = wasm_info.call_graph.function_map.get(&func) {
392        callees.called_function_indexes.iter().cloned().collect()
393    } else {
394        return;
395    };
396
397    for callee in callees {
398        if wasm_info.write_index_functions.contains(&callee) {
399            wasm_info.write_index_functions.insert(func);
400        } else {
401            mark_write(wasm_info, callee, visited);
402            if wasm_info.write_index_functions.contains(&callee) {
403                wasm_info.write_index_functions.insert(func);
404            }
405        }
406    }
407}
408
409fn is_ei_valid(imports: &[String], check_ei: Option<&EIVersion>) -> bool {
410    if let Some(ei) = check_ei {
411        let mut num_errors = 0;
412        for import in imports {
413            if !ei.contains_vm_hook(import.as_str()) {
414                num_errors += 1;
415            }
416        }
417
418        if num_errors == 0 {
419            return true;
420        }
421    }
422
423    false
424}
425
426fn is_mem_grow(code_section: &FunctionBody) -> bool {
427    let mut instructions_reader = code_section
428        .get_operators_reader()
429        .expect("Failed to get operators reader");
430
431    while let Ok(op) = instructions_reader.read() {
432        if let Operator::MemoryGrow { mem: _ } = op {
433            return true;
434        }
435    }
436
437    false
438}
439
440fn extract_opcode(op: Operator) -> String {
441    let op_str = format!("{:?}", op);
442    let op_vec: Vec<&str> = op_str.split_whitespace().collect();
443
444    op_vec[0].to_owned()
445}