gear_pwasm_utils/
optimizer.rs

1#[cfg(not(features = "std"))]
2use crate::std::collections::BTreeSet as Set;
3#[cfg(features = "std")]
4use crate::std::collections::HashSet as Set;
5use crate::std::{mem, vec::Vec};
6
7use crate::symbols::{expand_symbols, push_code_symbols, resolve_function, Symbol};
8use log::trace;
9use parity_wasm::elements;
10
11#[derive(Debug)]
12pub enum Error {
13	/// Since optimizer starts with export entries, export
14	///   section is supposed to exist.
15	NoExportSection,
16}
17
18pub fn optimize(
19	module: &mut elements::Module, // Module to optimize
20	used_exports: Vec<&str>,       // List of only exports that will be usable after optimization
21) -> Result<(), Error> {
22	// WebAssembly exports optimizer
23	// Motivation: emscripten compiler backend compiles in many unused exports
24	//   which in turn compile in unused imports and leaves unused functions
25
26	// try to parse name section
27	let module_temp = mem::take(module);
28	let module_temp = module_temp.parse_names().unwrap_or_else(|(_err, module)| module);
29	*module = module_temp;
30
31	// Algo starts from the top, listing all items that should stay
32	let mut stay = Set::new();
33	for (index, entry) in module
34		.export_section()
35		.ok_or(Error::NoExportSection)?
36		.entries()
37		.iter()
38		.enumerate()
39	{
40		if used_exports.iter().any(|e| *e == entry.field()) {
41			stay.insert(Symbol::Export(index));
42		}
43	}
44
45	// If there is start function in module, it should stary
46	module.start_section().map(|ss| stay.insert(resolve_function(module, ss)));
47
48	// All symbols used in data/element segments are also should be preserved
49	let mut init_symbols = Vec::new();
50	if let Some(data_section) = module.data_section() {
51		for segment in data_section.entries() {
52			push_code_symbols(
53				module,
54				segment
55					.offset()
56					.as_ref()
57					.expect("parity-wasm is compiled without bulk-memory operations")
58					.code(),
59				&mut init_symbols,
60			);
61		}
62	}
63	if let Some(elements_section) = module.elements_section() {
64		for segment in elements_section.entries() {
65			push_code_symbols(
66				module,
67				segment
68					.offset()
69					.as_ref()
70					.expect("parity-wasm is compiled without bulk-memory operations")
71					.code(),
72				&mut init_symbols,
73			);
74			for func_index in segment.members() {
75				stay.insert(resolve_function(module, *func_index));
76			}
77		}
78	}
79	for symbol in init_symbols.drain(..) {
80		stay.insert(symbol);
81	}
82
83	// Call function which will traverse the list recursively, filling stay with all symbols
84	// that are already used by those which already there
85	expand_symbols(module, &mut stay);
86
87	for symbol in stay.iter() {
88		trace!("symbol to stay: {:?}", symbol);
89	}
90
91	// Keep track of referreable symbols to rewire calls/globals
92	let mut eliminated_funcs = Vec::new();
93	let mut eliminated_globals = Vec::new();
94	let mut eliminated_types = Vec::new();
95
96	// First, iterate through types
97	let mut index = 0;
98	let mut old_index = 0;
99
100	{
101		loop {
102			if type_section(module).map(|section| section.types_mut().len()).unwrap_or(0) == index {
103				break
104			}
105
106			if stay.contains(&Symbol::Type(old_index)) {
107				index += 1;
108			} else {
109				type_section(module)
110					.expect("If type section does not exists, the loop will break at the beginning of first iteration")
111					.types_mut().remove(index);
112				eliminated_types.push(old_index);
113				trace!("Eliminated type({})", old_index);
114			}
115			old_index += 1;
116		}
117	}
118
119	// Second, iterate through imports
120	let mut top_funcs = 0;
121	let mut top_globals = 0;
122	index = 0;
123	old_index = 0;
124
125	if let Some(imports) = import_section(module) {
126		loop {
127			let mut remove = false;
128			match imports.entries()[index].external() {
129				elements::External::Function(_) => {
130					if stay.contains(&Symbol::Import(old_index)) {
131						index += 1;
132					} else {
133						remove = true;
134						eliminated_funcs.push(top_funcs);
135						trace!(
136							"Eliminated import({}) func({}, {})",
137							old_index,
138							top_funcs,
139							imports.entries()[index].field()
140						);
141					}
142					top_funcs += 1;
143				},
144				elements::External::Global(_) => {
145					if stay.contains(&Symbol::Import(old_index)) {
146						index += 1;
147					} else {
148						remove = true;
149						eliminated_globals.push(top_globals);
150						trace!(
151							"Eliminated import({}) global({}, {})",
152							old_index,
153							top_globals,
154							imports.entries()[index].field()
155						);
156					}
157					top_globals += 1;
158				},
159				_ => {
160					index += 1;
161				},
162			}
163			if remove {
164				imports.entries_mut().remove(index);
165			}
166
167			old_index += 1;
168
169			if index == imports.entries().len() {
170				break
171			}
172		}
173	}
174
175	// Third, iterate through globals
176	if let Some(globals) = global_section(module) {
177		index = 0;
178		old_index = 0;
179
180		loop {
181			if globals.entries_mut().len() == index {
182				break
183			}
184			if stay.contains(&Symbol::Global(old_index)) {
185				index += 1;
186			} else {
187				globals.entries_mut().remove(index);
188				eliminated_globals.push(top_globals + old_index);
189				trace!("Eliminated global({})", top_globals + old_index);
190			}
191			old_index += 1;
192		}
193	}
194
195	// Forth, delete orphaned functions
196	if function_section(module).is_some() && code_section(module).is_some() {
197		index = 0;
198		old_index = 0;
199
200		loop {
201			if function_section(module).expect("Functons section to exist").entries_mut().len() ==
202				index
203			{
204				break
205			}
206			if stay.contains(&Symbol::Function(old_index)) {
207				index += 1;
208			} else {
209				function_section(module)
210					.expect("Functons section to exist")
211					.entries_mut()
212					.remove(index);
213				code_section(module).expect("Code section to exist").bodies_mut().remove(index);
214
215				eliminated_funcs.push(top_funcs + old_index);
216				trace!("Eliminated function({})", top_funcs + old_index);
217			}
218			old_index += 1;
219		}
220	}
221
222	// Fifth, eliminate unused exports
223	{
224		let exports = export_section(module).ok_or(Error::NoExportSection)?;
225
226		index = 0;
227		old_index = 0;
228
229		loop {
230			if exports.entries_mut().len() == index {
231				break
232			}
233			if stay.contains(&Symbol::Export(old_index)) {
234				index += 1;
235			} else {
236				trace!(
237					"Eliminated export({}, {})",
238					old_index,
239					exports.entries_mut()[index].field()
240				);
241				exports.entries_mut().remove(index);
242			}
243			old_index += 1;
244		}
245	}
246
247	if !eliminated_globals.is_empty() ||
248		!eliminated_funcs.is_empty() ||
249		!eliminated_types.is_empty()
250	{
251		// Finaly, rewire all calls, globals references and types to the new indices
252		//   (only if there is anything to do)
253		// When sorting primitives sorting unstable is faster without any difference in result.
254		eliminated_globals.sort_unstable();
255		eliminated_funcs.sort_unstable();
256		eliminated_types.sort_unstable();
257
258		for section in module.sections_mut() {
259			match section {
260				elements::Section::Start(func_index) if !eliminated_funcs.is_empty() => {
261					let totalle =
262						eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
263					*func_index -= totalle as u32;
264				},
265				elements::Section::Function(function_section) if !eliminated_types.is_empty() =>
266					for func_signature in function_section.entries_mut() {
267						let totalle = eliminated_types
268							.iter()
269							.take_while(|i| (**i as u32) < func_signature.type_ref())
270							.count();
271						*func_signature.type_ref_mut() -= totalle as u32;
272					},
273				elements::Section::Import(import_section) if !eliminated_types.is_empty() => {
274					for import_entry in import_section.entries_mut() {
275						if let elements::External::Function(type_ref) = import_entry.external_mut()
276						{
277							let totalle = eliminated_types
278								.iter()
279								.take_while(|i| (**i as u32) < *type_ref)
280								.count();
281							*type_ref -= totalle as u32;
282						}
283					}
284				},
285				elements::Section::Code(code_section)
286					if !eliminated_globals.is_empty() || !eliminated_funcs.is_empty() =>
287				{
288					for func_body in code_section.bodies_mut() {
289						if !eliminated_funcs.is_empty() {
290							update_call_index(func_body.code_mut(), &eliminated_funcs);
291						}
292						if !eliminated_globals.is_empty() {
293							update_global_index(
294								func_body.code_mut().elements_mut(),
295								&eliminated_globals,
296							)
297						}
298						if !eliminated_types.is_empty() {
299							update_type_index(func_body.code_mut(), &eliminated_types)
300						}
301					}
302				},
303				elements::Section::Export(export_section) => {
304					for export in export_section.entries_mut() {
305						match export.internal_mut() {
306							elements::Internal::Function(func_index) => {
307								let totalle = eliminated_funcs
308									.iter()
309									.take_while(|i| (**i as u32) < *func_index)
310									.count();
311								*func_index -= totalle as u32;
312							},
313							elements::Internal::Global(global_index) => {
314								let totalle = eliminated_globals
315									.iter()
316									.take_while(|i| (**i as u32) < *global_index)
317									.count();
318								*global_index -= totalle as u32;
319							},
320							_ => {},
321						}
322					}
323				},
324				elements::Section::Global(global_section) => {
325					for global_entry in global_section.entries_mut() {
326						update_global_index(
327							global_entry.init_expr_mut().code_mut(),
328							&eliminated_globals,
329						)
330					}
331				},
332				elements::Section::Data(data_section) =>
333					for segment in data_section.entries_mut() {
334						update_global_index(
335							segment
336								.offset_mut()
337								.as_mut()
338								.expect("parity-wasm is compiled without bulk-memory operations")
339								.code_mut(),
340							&eliminated_globals,
341						)
342					},
343				elements::Section::Element(elements_section) => {
344					for segment in elements_section.entries_mut() {
345						update_global_index(
346							segment
347								.offset_mut()
348								.as_mut()
349								.expect("parity-wasm is compiled without bulk-memory operations")
350								.code_mut(),
351							&eliminated_globals,
352						);
353						// update all indirect call addresses initial values
354						for func_index in segment.members_mut() {
355							let totalle = eliminated_funcs
356								.iter()
357								.take_while(|i| (**i as u32) < *func_index)
358								.count();
359							*func_index -= totalle as u32;
360						}
361					}
362				},
363				elements::Section::Name(name_section) => {
364					if let Some(func_name) = name_section.functions_mut() {
365						let mut func_name_map = mem::take(func_name.names_mut());
366						for index in &eliminated_funcs {
367							func_name_map.remove(*index as u32);
368						}
369						let updated_map = func_name_map
370							.into_iter()
371							.map(|(index, value)| {
372								let totalle = eliminated_funcs
373									.iter()
374									.take_while(|i| (**i as u32) < index)
375									.count() as u32;
376								(index - totalle, value)
377							})
378							.collect();
379						*func_name.names_mut() = updated_map;
380					}
381
382					if let Some(local_name) = name_section.locals_mut() {
383						let mut local_names_map = mem::take(local_name.local_names_mut());
384						for index in &eliminated_funcs {
385							local_names_map.remove(*index as u32);
386						}
387						let updated_map = local_names_map
388							.into_iter()
389							.map(|(index, value)| {
390								let totalle = eliminated_funcs
391									.iter()
392									.take_while(|i| (**i as u32) < index)
393									.count() as u32;
394								(index - totalle, value)
395							})
396							.collect();
397						*local_name.local_names_mut() = updated_map;
398					}
399				},
400				_ => {},
401			}
402		}
403	}
404
405	// Also drop all custom sections
406	module
407		.sections_mut()
408		.retain(|section| !matches!(section, elements::Section::Custom(_)));
409
410	Ok(())
411}
412
413pub fn update_call_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
414	use parity_wasm::elements::Instruction::*;
415	for instruction in instructions.elements_mut().iter_mut() {
416		if let Call(call_index) = instruction {
417			let totalle =
418				eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
419			trace!("rewired call {} -> call {}", *call_index, *call_index - totalle as u32);
420			*call_index -= totalle as u32;
421		}
422	}
423}
424
425/// Updates global references considering the _ordered_ list of eliminated indices
426pub fn update_global_index(
427	instructions: &mut Vec<elements::Instruction>,
428	eliminated_indices: &[usize],
429) {
430	use parity_wasm::elements::Instruction::*;
431	for instruction in instructions.iter_mut() {
432		match instruction {
433			GetGlobal(index) | SetGlobal(index) => {
434				let totalle =
435					eliminated_indices.iter().take_while(|i| (**i as u32) < *index).count();
436				trace!("rewired global {} -> global {}", *index, *index - totalle as u32);
437				*index -= totalle as u32;
438			},
439			_ => {},
440		}
441	}
442}
443
444/// Updates global references considering the _ordered_ list of eliminated indices
445pub fn update_type_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
446	use parity_wasm::elements::Instruction::*;
447	for instruction in instructions.elements_mut().iter_mut() {
448		if let CallIndirect(call_index, _) = instruction {
449			let totalle =
450				eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
451			trace!(
452				"rewired call_indrect {} -> call_indirect {}",
453				*call_index,
454				*call_index - totalle as u32
455			);
456			*call_index -= totalle as u32;
457		}
458	}
459}
460
461pub fn import_section(module: &mut elements::Module) -> Option<&mut elements::ImportSection> {
462	for section in module.sections_mut() {
463		if let elements::Section::Import(sect) = section {
464			return Some(sect)
465		}
466	}
467	None
468}
469
470pub fn global_section(module: &mut elements::Module) -> Option<&mut elements::GlobalSection> {
471	for section in module.sections_mut() {
472		if let elements::Section::Global(sect) = section {
473			return Some(sect)
474		}
475	}
476	None
477}
478
479pub fn function_section(module: &mut elements::Module) -> Option<&mut elements::FunctionSection> {
480	for section in module.sections_mut() {
481		if let elements::Section::Function(sect) = section {
482			return Some(sect)
483		}
484	}
485	None
486}
487
488pub fn code_section(module: &mut elements::Module) -> Option<&mut elements::CodeSection> {
489	for section in module.sections_mut() {
490		if let elements::Section::Code(sect) = section {
491			return Some(sect)
492		}
493	}
494	None
495}
496
497pub fn export_section(module: &mut elements::Module) -> Option<&mut elements::ExportSection> {
498	for section in module.sections_mut() {
499		if let elements::Section::Export(sect) = section {
500			return Some(sect)
501		}
502	}
503	None
504}
505
506pub fn type_section(module: &mut elements::Module) -> Option<&mut elements::TypeSection> {
507	for section in module.sections_mut() {
508		if let elements::Section::Type(sect) = section {
509			return Some(sect)
510		}
511	}
512	None
513}
514
515#[cfg(test)]
516mod tests {
517
518	use super::*;
519	use parity_wasm::{builder, elements};
520
521	/// @spec 0
522	/// Optimizer presumes that export section exists and contains
523	/// all symbols passed as a second parameter. Since empty module
524	/// obviously contains no export section, optimizer should return
525	/// error on it.
526	#[test]
527	fn empty() {
528		let mut module = builder::module().build();
529		let result = optimize(&mut module, vec!["_call"]);
530
531		assert!(result.is_err());
532	}
533
534	/// @spec 1
535	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
536	/// and exports both of them in the export section. During optimization, the `_random`
537	/// function should vanish completely, given we pass `_call` as the only function to stay
538	/// in the module.
539	#[test]
540	fn minimal() {
541		let mut module = builder::module()
542			.function()
543			.signature()
544			.param()
545			.i32()
546			.build()
547			.build()
548			.function()
549			.signature()
550			.param()
551			.i32()
552			.param()
553			.i32()
554			.build()
555			.build()
556			.export()
557			.field("_call")
558			.internal()
559			.func(0)
560			.build()
561			.export()
562			.field("_random")
563			.internal()
564			.func(1)
565			.build()
566			.build();
567		assert_eq!(
568			module.export_section().expect("export section to be generated").entries().len(),
569			2
570		);
571
572		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
573
574		assert_eq!(
575			1,
576			module.export_section().expect("export section to be generated").entries().len(),
577			"There should only 1 (one) export entry in the optimized module"
578		);
579
580		assert_eq!(
581			1,
582			module
583				.function_section()
584				.expect("functions section to be generated")
585				.entries()
586				.len(),
587			"There should 2 (two) functions in the optimized module"
588		);
589	}
590
591	/// @spec 2
592	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
593	/// to stay during the optimization. The code of this function uses global during the execution.
594	/// This sayed global should survive the optimization.
595	#[test]
596	fn globals() {
597		let mut module = builder::module()
598			.global()
599			.value_type()
600			.i32()
601			.build()
602			.function()
603			.signature()
604			.param()
605			.i32()
606			.build()
607			.body()
608			.with_instructions(elements::Instructions::new(vec![
609				elements::Instruction::GetGlobal(0),
610				elements::Instruction::End,
611			]))
612			.build()
613			.build()
614			.export()
615			.field("_call")
616			.internal()
617			.func(0)
618			.build()
619			.build();
620
621		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
622
623		assert_eq!(
624			1,
625			module.global_section().expect("global section to be generated").entries().len(),
626			"There should 1 (one) global entry in the optimized module, since _call function uses it"
627		);
628	}
629
630	/// @spec 2
631	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
632	/// to stay during the optimization. The code of this function uses one global during the execution,
633	/// but we have a bunch of other unused globals in the code. Last globals should not survive the optimization,
634	/// while the former should.
635	#[test]
636	fn globals_2() {
637		let mut module = builder::module()
638			.global()
639			.value_type()
640			.i32()
641			.build()
642			.global()
643			.value_type()
644			.i64()
645			.build()
646			.global()
647			.value_type()
648			.f32()
649			.build()
650			.function()
651			.signature()
652			.param()
653			.i32()
654			.build()
655			.body()
656			.with_instructions(elements::Instructions::new(vec![
657				elements::Instruction::GetGlobal(1),
658				elements::Instruction::End,
659			]))
660			.build()
661			.build()
662			.export()
663			.field("_call")
664			.internal()
665			.func(0)
666			.build()
667			.build();
668
669		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
670
671		assert_eq!(
672			1,
673			module.global_section().expect("global section to be generated").entries().len(),
674			"There should 1 (one) global entry in the optimized module, since _call function uses only one"
675		);
676	}
677
678	/// @spec 3
679	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
680	/// and exports both of them in the export section. Function `_call` also calls `_random`
681	/// in its function body. The optimization should kick `_random` function from the export section
682	/// but preserve it's body.
683	#[test]
684	fn call_ref() {
685		let mut module = builder::module()
686			.function()
687			.signature()
688			.param()
689			.i32()
690			.build()
691			.body()
692			.with_instructions(elements::Instructions::new(vec![
693				elements::Instruction::Call(1),
694				elements::Instruction::End,
695			]))
696			.build()
697			.build()
698			.function()
699			.signature()
700			.param()
701			.i32()
702			.param()
703			.i32()
704			.build()
705			.build()
706			.export()
707			.field("_call")
708			.internal()
709			.func(0)
710			.build()
711			.export()
712			.field("_random")
713			.internal()
714			.func(1)
715			.build()
716			.build();
717		assert_eq!(
718			module.export_section().expect("export section to be generated").entries().len(),
719			2
720		);
721
722		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
723
724		assert_eq!(
725			1,
726			module.export_section().expect("export section to be generated").entries().len(),
727			"There should only 1 (one) export entry in the optimized module"
728		);
729
730		assert_eq!(
731			2,
732			module
733				.function_section()
734				.expect("functions section to be generated")
735				.entries()
736				.len(),
737			"There should 2 (two) functions in the optimized module"
738		);
739	}
740
741	/// @spec 4
742	/// Imagine the unoptimized module has an indirect call to function of type 1
743	/// The type should persist so that indirect call would work
744	#[test]
745	fn call_indirect() {
746		let mut module = builder::module()
747			.function()
748			.signature()
749			.param()
750			.i32()
751			.param()
752			.i32()
753			.build()
754			.build()
755			.function()
756			.signature()
757			.param()
758			.i32()
759			.param()
760			.i32()
761			.param()
762			.i32()
763			.build()
764			.build()
765			.function()
766			.signature()
767			.param()
768			.i32()
769			.build()
770			.body()
771			.with_instructions(elements::Instructions::new(vec![
772				elements::Instruction::CallIndirect(1, 0),
773				elements::Instruction::End,
774			]))
775			.build()
776			.build()
777			.export()
778			.field("_call")
779			.internal()
780			.func(2)
781			.build()
782			.build();
783
784		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
785
786		assert_eq!(
787			2,
788			module.type_section().expect("type section to be generated").types().len(),
789			"There should 2 (two) types left in the module, 1 for indirect call and one for _call"
790		);
791
792		let indirect_opcode =
793			&module.code_section().expect("code section to be generated").bodies()[0]
794				.code()
795				.elements()[0];
796		match *indirect_opcode {
797			elements::Instruction::CallIndirect(0, 0) => {},
798			_ => {
799				panic!(
800					"Expected call_indirect to use index 0 after optimization, since previois 0th was eliminated, but got {:?}",
801					indirect_opcode
802				);
803			},
804		}
805	}
806}