multiversx_sc_meta_lib/tools/wasm_extractor/
extractor.rs1use colored::Colorize;
2use std::{
3 collections::{HashMap, HashSet},
4 fs,
5 path::{Path, PathBuf},
6};
7use wasmparser::{
8 BinaryReaderError, CompositeInnerType, DataSectionReader, ElementItems, ElementSectionReader,
9 ExportSectionReader, FunctionBody, ImportSectionReader, Operator, Parser, Payload, TypeRef,
10 TypeSectionReader, ValType,
11};
12
13use crate::{ei::EIVersion, tools::CodeReport};
14
15use super::{
16 opcode_whitelist::is_opcode_whitelisted, report::WasmReport, CallGraph, EndpointInfo,
17 FunctionInfo, OpcodeVersion,
18};
19
20const ERROR_FAIL_ALLOCATOR: &[u8; 27] = b"memory allocation forbidden";
21const WRITE_OP: &[&str] = &[
22 "mBufferStorageStore",
23 "storageStore",
24 "int64storageStore",
25 "bigIntStorageStoreUnsigned",
26 "smallIntStorageStoreUnsigned",
27 "smallIntStorageStoreSigned",
28];
29
30#[derive(Default, Debug, Clone)]
31pub struct WasmInfo {
32 pub call_graph: CallGraph,
33 pub write_index_functions: HashSet<usize>,
34 pub report: WasmReport,
35 pub data: Vec<u8>,
36 pub func_types: HashMap<u32, FunctionType>,
37}
38
39#[derive(Debug, Clone)]
40pub struct FunctionType {
41 pub params: Vec<ValType>,
42 pub returns: Vec<ValType>,
43}
44
45impl WasmInfo {
46 pub fn extract_wasm_report(
47 output_wasm_path: &PathBuf,
48 extract_imports_enabled: bool,
49 check_ei: Option<&EIVersion>,
50 endpoints: &HashMap<&str, bool>,
51 opcode_version: OpcodeVersion,
52 ) -> WasmReport {
53 let wasm_data = fs::read(output_wasm_path)
54 .expect("error occurred while extracting information from .wasm: file not found");
55
56 let wasm_info = WasmInfo::default()
57 .add_endpoints(endpoints)
58 .add_path(output_wasm_path)
59 .add_wasm_data(&wasm_data)
60 .populate_wasm_info(extract_imports_enabled, check_ei, opcode_version)
61 .expect("error occurred while extracting information from .wasm file");
62
63 wasm_info.report
64 }
65
66 pub(crate) fn populate_wasm_info(
67 self,
68 import_extraction_enabled: bool,
69 check_ei: Option<&EIVersion>,
70 opcode_version: OpcodeVersion,
71 ) -> Result<WasmInfo, BinaryReaderError> {
72 let parser = Parser::new(0);
73 let mut wasm_info = self.clone();
74
75 for payload in parser.parse_all(&self.data) {
76 match payload? {
77 Payload::TypeSection(type_section) => {
78 wasm_info.parse_type_section(type_section);
79 }
80 Payload::ImportSection(import_section) => {
81 wasm_info.process_imports(import_section, import_extraction_enabled);
82 wasm_info.report.ei_check |= is_ei_valid(&wasm_info.report.imports, check_ei);
83 }
84 Payload::DataSection(data_section) => {
85 wasm_info.report.code.has_allocator |=
86 is_fail_allocator_triggered(data_section.clone());
87 wasm_info.report.code.has_panic.max_severity(data_section);
88 }
89 Payload::CodeSectionEntry(code_section) => {
90 wasm_info.report.memory_grow_flag |= is_mem_grow(&code_section);
91 wasm_info.create_call_graph(code_section, opcode_version);
92 }
93 Payload::ExportSection(export_section) => {
94 wasm_info.parse_export_section(export_section);
95 }
96 Payload::ElementSection(elem_section) => {
97 wasm_info.parse_element_section(elem_section);
98 }
99 _ => {}
100 }
101 }
102
103 wasm_info
104 .call_graph
105 .populate_accessible_from_function_indexes();
106 wasm_info
107 .call_graph
108 .populate_accessible_from_call_indirect();
109 wasm_info.call_graph.populate_function_endpoints();
110 wasm_info
111 .call_graph
112 .populate_call_indirect_accessible_from_endpoints();
113
114 wasm_info.detect_write_operations_in_views();
115 wasm_info.detect_forbidden_opcodes();
116
117 Ok(wasm_info)
118 }
119
120 fn parse_type_section(&mut self, type_section: TypeSectionReader) {
121 for (ty_index, ty_result) in type_section.into_iter().enumerate() {
122 let rec_group = ty_result.expect("Failed to read type section");
123 for sub_type in rec_group.into_types() {
124 if let CompositeInnerType::Func(func_ty) = sub_type.composite_type.inner {
125 let ft = FunctionType {
126 params: func_ty.params().to_vec(),
127 returns: func_ty.results().to_vec(),
128 };
129 self.func_types.insert(ty_index as u32, ft);
130 }
131 }
132 }
133 }
134
135 pub(crate) fn add_endpoints(mut self, endpoints: &HashMap<&str, bool>) -> Self {
136 for (name, readonly) in endpoints {
137 self.call_graph
138 .endpoints
139 .insert(name.to_string(), EndpointInfo::default(*readonly));
140 }
141
142 self
143 }
144
145 pub(crate) fn add_wasm_data(self, data: &[u8]) -> Self {
146 WasmInfo {
147 data: data.to_vec(),
148 ..self
149 }
150 }
151
152 fn add_path(self, path: &Path) -> Self {
153 WasmInfo {
154 report: WasmReport {
155 code: CodeReport {
156 path: path.to_path_buf(),
157 ..self.report.code
158 },
159 ..self.report
160 },
161 ..self
162 }
163 }
164
165 fn create_call_graph(&mut self, body: FunctionBody, opcode_version: OpcodeVersion) {
166 let mut instructions_reader = body
167 .get_operators_reader()
168 .expect("Failed to get operators reader");
169
170 let mut function_info = FunctionInfo::new();
171 let function_index = self.call_graph.next_function_index();
172 while let Ok(op) = instructions_reader.read() {
173 match op {
174 Operator::Call { function_index } => {
175 function_info.add_called_function(function_index as usize);
176 }
177 Operator::CallIndirect { .. } => {
178 function_info.contains_call_indirect = true;
179 }
180 _ => {}
181 }
182
183 if !is_opcode_whitelisted(&op, opcode_version) {
184 let opcode = extract_opcode(op);
185 function_info.add_forbidden_opcode(opcode);
186 }
187 }
188
189 self.call_graph
190 .insert_function(function_index, function_info);
191 }
192
193 fn process_imports(
194 &mut self,
195 import_section: ImportSectionReader,
196 import_extraction_enabled: bool,
197 ) {
198 let signature_map = crate::ei::vm_hook_signature_map();
199
200 for (index, import) in import_section.into_iter().flatten().enumerate() {
201 if let TypeRef::Func(type_index) = &import.ty {
202 let func_type = self
203 .func_types
204 .get(type_index)
205 .expect("invalid wasm function type index");
206
207 crate::ei::check_vm_hook_signatures(
208 import.name,
209 &func_type.params,
210 &func_type.returns,
211 &signature_map,
212 );
213 if import_extraction_enabled {
214 self.report.imports.push(import.name.to_string());
215 }
216 self.call_graph.insert_function(index, FunctionInfo::new());
217 if WRITE_OP.contains(&import.name) {
218 self.write_index_functions.insert(index);
219 }
220 }
221 }
222
223 self.report.imports.sort();
224 }
225
226 fn detect_write_operations_in_views(&mut self) {
227 let mut visited: HashSet<usize> = HashSet::new();
228
229 for index in get_view_endpoints_indexes(&self.call_graph.endpoints) {
230 mark_write(self, index, &mut visited);
231 }
232
233 for (name, index) in get_view_endpoints(&self.call_graph.endpoints) {
234 if self.write_index_functions.contains(&index) {
235 println!(
236 "{} {}",
237 "Write storage operation in VIEW endpoint:"
238 .to_string()
239 .red()
240 .bold(),
241 name.red().bold()
242 );
243 }
244 }
245 }
246
247 fn detect_forbidden_opcodes(&mut self) {
248 for (&func_index, func_info) in &self.call_graph.function_map {
249 if func_info.forbidden_opcodes.is_empty() {
250 continue;
251 }
252
253 let opcodes = func_info
254 .forbidden_opcodes
255 .iter()
256 .cloned()
257 .collect::<Vec<String>>()
258 .join(", ");
259 let mut message =
260 format!("Forbidden opcodes detected in function {func_index}: {opcodes}.");
261
262 let endpoints = self
263 .call_graph
264 .function_accessible_from_endpoints(func_index);
265 if !endpoints.is_empty() {
266 message.push_str(&format!(
267 " This function is accessible endpoints: {}.",
268 endpoints
269 .iter()
270 .cloned()
271 .collect::<Vec<String>>()
272 .join(", ")
273 ));
274 }
275 for endpoint in endpoints {
276 for forbidden_opcode in &func_info.forbidden_opcodes {
277 self.report.add_forbidden_opcode_accessible_from_endpoint(
278 endpoint.clone(),
279 forbidden_opcode.clone(),
280 );
281 }
282 }
283
284 if func_info.accessible_from_call_indirect {
285 for endpoint in &self.call_graph.call_indirect_accessible_from_endpoints {
286 for forbidden_opcode in &func_info.forbidden_opcodes {
287 self.report.add_forbidden_opcode_accessible_from_endpoint(
288 endpoint.clone(),
289 forbidden_opcode.clone(),
290 );
291 }
292 }
293 message.push_str(&format!(
294 " This function is accessible via call_indirect, from endpoints: {}.",
295 self.call_graph
296 .call_indirect_accessible_from_endpoints
297 .iter()
298 .cloned()
299 .collect::<Vec<String>>()
300 .join(", ")
301 ));
302 }
303
304 println!("{}", message.red().bold());
305 }
306 }
307
308 fn parse_export_section(&mut self, export_section: ExportSectionReader) {
309 if self.call_graph.endpoints.is_empty() {
310 return;
311 }
312
313 for export in export_section {
314 let export = export.expect("Failed to read export section");
315 if wasmparser::ExternalKind::Func == export.kind {
316 if let Some(endpoint) = self.call_graph.endpoints.get_mut(export.name) {
317 endpoint.set_index(export.index.try_into().unwrap());
318 }
319 }
320 }
321 }
322
323 fn parse_element_section(&mut self, element_section: ElementSectionReader) {
324 for t in element_section.into_iter() {
325 let element = t.expect("Failed to read table section");
326
327 if let ElementItems::Functions(functions) = element.items {
328 for func_result in functions {
329 let function_index =
330 func_result.expect("Failed to read function index in element section");
331 self.call_graph
332 .table_functions
333 .push(function_index as usize);
334 }
335 }
336 }
337 }
338}
339
340pub(crate) fn get_view_endpoints_indexes(endpoints: &HashMap<String, EndpointInfo>) -> Vec<usize> {
341 endpoints
342 .values()
343 .filter(|endpoint_info| endpoint_info.readonly)
344 .map(|endpoint_info| endpoint_info.index)
345 .collect()
346}
347
348pub(crate) fn get_view_endpoints(
349 endpoints: &HashMap<String, EndpointInfo>,
350) -> HashMap<&str, usize> {
351 let mut view_endpoints = HashMap::new();
352
353 for (name, endpoint_info) in endpoints {
354 if endpoint_info.readonly {
355 view_endpoints.insert(name.as_str(), endpoint_info.index);
356 }
357 }
358
359 view_endpoints
360}
361
362fn is_fail_allocator_triggered(data_section: DataSectionReader) -> bool {
363 for data_fragment in data_section.into_iter().flatten() {
364 if data_fragment
365 .data
366 .windows(ERROR_FAIL_ALLOCATOR.len())
367 .any(|data| data == ERROR_FAIL_ALLOCATOR)
368 {
369 println!(
370 "{}",
371 "FailAllocator used while memory allocation is accessible in code. Contract may fail unexpectedly when memory allocation is attempted"
372 .to_string()
373 .red()
374 .bold()
375 );
376 return true;
377 }
378 }
379
380 false
381}
382
383fn mark_write(wasm_info: &mut WasmInfo, func: usize, visited: &mut HashSet<usize>) {
384 if visited.contains(&func) {
386 return;
387 }
388
389 visited.insert(func);
390
391 let callees: Vec<usize> = if let Some(callees) = wasm_info.call_graph.function_map.get(&func) {
392 callees.called_function_indexes.iter().cloned().collect()
393 } else {
394 return;
395 };
396
397 for callee in callees {
398 if wasm_info.write_index_functions.contains(&callee) {
399 wasm_info.write_index_functions.insert(func);
400 } else {
401 mark_write(wasm_info, callee, visited);
402 if wasm_info.write_index_functions.contains(&callee) {
403 wasm_info.write_index_functions.insert(func);
404 }
405 }
406 }
407}
408
409fn is_ei_valid(imports: &[String], check_ei: Option<&EIVersion>) -> bool {
410 if let Some(ei) = check_ei {
411 let mut num_errors = 0;
412 for import in imports {
413 if !ei.contains_vm_hook(import.as_str()) {
414 num_errors += 1;
415 }
416 }
417
418 if num_errors == 0 {
419 return true;
420 }
421 }
422
423 false
424}
425
426fn is_mem_grow(code_section: &FunctionBody) -> bool {
427 let mut instructions_reader = code_section
428 .get_operators_reader()
429 .expect("Failed to get operators reader");
430
431 while let Ok(op) = instructions_reader.read() {
432 if let Operator::MemoryGrow { mem: _ } = op {
433 return true;
434 }
435 }
436
437 false
438}
439
440fn extract_opcode(op: Operator) -> String {
441 let op_str = format!("{:?}", op);
442 let op_vec: Vec<&str> = op_str.split_whitespace().collect();
443
444 op_vec[0].to_owned()
445}