twasm_utils/
optimizer.rs

1#[cfg(features = "std")]
2use crate::std::collections::{HashSet as Set};
3#[cfg(not(features = "std"))]
4use crate::std::collections::{BTreeSet as Set};
5use crate::std::vec::Vec;
6use crate::std::mem;
7
8use tetsy_wasm::elements;
9
10use crate::symbols::{Symbol, expand_symbols, push_code_symbols, resolve_function};
11
12#[derive(Debug)]
13pub enum Error {
14	/// Since optimizer starts with export entries, export
15	///   section is supposed to exist.
16	NoExportSection,
17}
18
19pub fn optimize(
20	module: &mut elements::Module, // Module to optimize
21	used_exports: Vec<&str>,       // List of only exports that will be usable after optimization
22) -> Result<(), Error> {
23	// WebAssembly exports optimizer
24	// Motivation: emscripten compiler backend compiles in many unused exports
25	//   which in turn compile in unused imports and leaves unused functions
26
27	// try to parse name section
28	let module_temp = mem::take(module);
29	let module_temp = module_temp
30		.parse_names()
31		.unwrap_or_else(|(_err, module)| module);
32	*module = module_temp;
33
34	// Algo starts from the top, listing all items that should stay
35	let mut stay = Set::new();
36	for (index, entry) in module.export_section().ok_or(Error::NoExportSection)?.entries().iter().enumerate() {
37		if used_exports.iter().any(|e| *e == entry.field()) {
38			stay.insert(Symbol::Export(index));
39		}
40	}
41
42	// If there is start function in module, it should stary
43	module.start_section().map(|ss| stay.insert(resolve_function(&module, ss)));
44
45	// All symbols used in data/element segments are also should be preserved
46	let mut init_symbols = Vec::new();
47	if let Some(data_section) = module.data_section() {
48		for segment in data_section.entries() {
49			push_code_symbols(
50				&module,
51				segment
52					.offset()
53					.as_ref()
54					.expect("tetsy-wasm is compiled without bulk-memory operations")
55					.code(),
56				&mut init_symbols,
57			);
58		}
59	}
60	if let Some(elements_section) = module.elements_section() {
61		for segment in elements_section.entries() {
62			push_code_symbols(
63				&module,
64				segment
65					.offset()
66					.as_ref()
67					.expect("tetsy-wasm is compiled without bulk-memory operations")
68					.code(),
69				&mut init_symbols
70			);
71			for func_index in segment.members() {
72				stay.insert(resolve_function(&module, *func_index));
73			}
74		}
75	}
76	for symbol in init_symbols.drain(..) { stay.insert(symbol); }
77
78	// Call function which will traverse the list recursively, filling stay with all symbols
79	// that are already used by those which already there
80	expand_symbols(module, &mut stay);
81
82	for symbol in stay.iter() {
83		trace!("symbol to stay: {:?}", symbol);
84	}
85
86	// Keep track of referreable symbols to rewire calls/globals
87	let mut eliminated_funcs = Vec::new();
88	let mut eliminated_globals = Vec::new();
89	let mut eliminated_types = Vec::new();
90
91	// First, iterate through types
92	let mut index = 0;
93	let mut old_index = 0;
94
95	{
96		loop {
97			if type_section(module).map(|section| section.types_mut().len()).unwrap_or(0) == index { break; }
98
99			if stay.contains(&Symbol::Type(old_index)) {
100				index += 1;
101			} else {
102				type_section(module)
103					.expect("If type section does not exists, the loop will break at the beginning of first iteration")
104					.types_mut().remove(index);
105				eliminated_types.push(old_index);
106				trace!("Eliminated type({})", old_index);
107			}
108			old_index += 1;
109		}
110	}
111
112	// Second, iterate through imports
113	let mut top_funcs = 0;
114	let mut top_globals = 0;
115	index = 0;
116	old_index = 0;
117
118	if let Some(imports) = import_section(module) {
119		loop {
120			let mut remove = false;
121			match imports.entries()[index].external() {
122				elements::External::Function(_) => {
123					if stay.contains(&Symbol::Import(old_index)) {
124						index += 1;
125					} else {
126						remove = true;
127						eliminated_funcs.push(top_funcs);
128						trace!("Eliminated import({}) func({}, {})", old_index, top_funcs, imports.entries()[index].field());
129					}
130					top_funcs += 1;
131				},
132				elements::External::Global(_) => {
133					if stay.contains(&Symbol::Import(old_index)) {
134						index += 1;
135					} else {
136						remove = true;
137						eliminated_globals.push(top_globals);
138						trace!("Eliminated import({}) global({}, {})", old_index, top_globals, imports.entries()[index].field());
139					}
140					top_globals += 1;
141				},
142				_ => {
143					index += 1;
144				}
145			}
146			if remove {
147				imports.entries_mut().remove(index);
148			}
149
150			old_index += 1;
151
152			if index == imports.entries().len() { break; }
153		}
154	}
155
156	// Third, iterate through globals
157	if let Some(globals) = global_section(module) {
158		index = 0;
159		old_index = 0;
160
161		loop {
162			if globals.entries_mut().len() == index { break; }
163			if stay.contains(&Symbol::Global(old_index)) {
164				index += 1;
165			} else {
166				globals.entries_mut().remove(index);
167				eliminated_globals.push(top_globals + old_index);
168				trace!("Eliminated global({})", top_globals + old_index);
169			}
170			old_index += 1;
171		}
172	}
173
174	// Forth, delete orphaned functions
175	if function_section(module).is_some() && code_section(module).is_some() {
176		index = 0;
177		old_index = 0;
178
179		loop {
180			if function_section(module).expect("Functons section to exist").entries_mut().len() == index { break; }
181			if stay.contains(&Symbol::Function(old_index)) {
182				index += 1;
183			} else {
184				function_section(module).expect("Functons section to exist").entries_mut().remove(index);
185				code_section(module).expect("Code section to exist").bodies_mut().remove(index);
186
187				eliminated_funcs.push(top_funcs + old_index);
188				trace!("Eliminated function({})", top_funcs + old_index);
189			}
190			old_index += 1;
191		}
192	}
193
194	// Fifth, eliminate unused exports
195	{
196		let exports = export_section(module).ok_or(Error::NoExportSection)?;
197
198		index = 0;
199		old_index = 0;
200
201		loop {
202			if exports.entries_mut().len() == index { break; }
203			if stay.contains(&Symbol::Export(old_index)) {
204				index += 1;
205			} else {
206				trace!("Eliminated export({}, {})", old_index, exports.entries_mut()[index].field());
207				exports.entries_mut().remove(index);
208			}
209			old_index += 1;
210		}
211	}
212
213	if !eliminated_globals.is_empty() || !eliminated_funcs.is_empty() || !eliminated_types.is_empty() {
214		// Finaly, rewire all calls, globals references and types to the new indices
215		//   (only if there is anything to do)
216		// When sorting primitives sorting unstable is faster without any difference in result.
217		eliminated_globals.sort_unstable();
218		eliminated_funcs.sort_unstable();
219		eliminated_types.sort_unstable();
220
221		for section in module.sections_mut() {
222			match section {
223				elements::Section::Start(func_index) if !eliminated_funcs.is_empty() => {
224					let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
225					*func_index -= totalle as u32;
226				},
227				elements::Section::Function(function_section) if !eliminated_types.is_empty() => {
228					for func_signature in function_section.entries_mut() {
229						let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < func_signature.type_ref()).count();
230						*func_signature.type_ref_mut() -= totalle as u32;
231					}
232				},
233				elements::Section::Import(import_section) if !eliminated_types.is_empty() => {
234					for import_entry in import_section.entries_mut() {
235						if let elements::External::Function(type_ref) = import_entry.external_mut() {
236							let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < *type_ref).count();
237							*type_ref -= totalle as u32;
238						}
239					}
240				},
241				elements::Section::Code(code_section) if !eliminated_globals.is_empty() || !eliminated_funcs.is_empty() => {
242					for func_body in code_section.bodies_mut() {
243						if !eliminated_funcs.is_empty() {
244							update_call_index(func_body.code_mut(), &eliminated_funcs);
245						}
246						if !eliminated_globals.is_empty() {
247							update_global_index(func_body.code_mut().elements_mut(), &eliminated_globals)
248						}
249						if !eliminated_types.is_empty() {
250							update_type_index(func_body.code_mut(), &eliminated_types)
251						}
252					}
253				},
254				elements::Section::Export(export_section) => {
255					for export in export_section.entries_mut() {
256						match export.internal_mut() {
257							elements::Internal::Function(func_index) => {
258								let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
259								*func_index -= totalle as u32;
260							},
261							elements::Internal::Global(global_index) => {
262								let totalle = eliminated_globals.iter().take_while(|i| (**i as u32) < *global_index).count();
263								*global_index -= totalle as u32;
264							},
265							_ => {}
266						}
267					}
268				},
269				elements::Section::Global(global_section) => {
270					for global_entry in global_section.entries_mut() {
271						update_global_index(global_entry.init_expr_mut().code_mut(), &eliminated_globals)
272					}
273				},
274				elements::Section::Data(data_section) => {
275					for segment in data_section.entries_mut() {
276						update_global_index(
277							segment
278								.offset_mut()
279								.as_mut()
280								.expect("tetsy-wasm is compiled without bulk-memory operations")
281								.code_mut(),
282							&eliminated_globals,
283						)
284					}
285				},
286				elements::Section::Element(elements_section) => {
287					for segment in elements_section.entries_mut() {
288						update_global_index(
289							segment
290							.offset_mut()
291							.as_mut()
292							.expect("tetsy-wasm is compiled without bulk-memory operations")
293							.code_mut(),
294							&eliminated_globals
295						);
296						// update all indirect call addresses initial values
297						for func_index in segment.members_mut() {
298							let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
299							*func_index -= totalle as u32;
300						}
301					}
302				},
303				elements::Section::Name(name_section) => {
304					if let Some(func_name) = name_section.functions_mut() {
305						let mut func_name_map = mem::take(func_name.names_mut());
306						for index in &eliminated_funcs {
307							func_name_map.remove(*index as u32);
308						}
309						let updated_map = func_name_map.into_iter().map(|(index, value)| {
310							let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < index).count() as u32;
311							(index - totalle, value)
312						}).collect();
313						*func_name.names_mut() = updated_map;
314					}
315
316					if let Some(local_name) = name_section.locals_mut() {
317						let mut local_names_map = mem::take(local_name.local_names_mut());
318						for index in &eliminated_funcs {
319							local_names_map.remove(*index as u32);
320						}
321						let updated_map = local_names_map.into_iter().map(|(index, value)| {
322							let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < index).count() as u32;
323							(index - totalle, value)
324						}).collect();
325						*local_name.local_names_mut() = updated_map;
326					}
327				}
328				_ => { }
329			}
330		}
331	}
332
333	Ok(())
334}
335
336
337pub fn update_call_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
338	use tetsy_wasm::elements::Instruction::*;
339	for instruction in instructions.elements_mut().iter_mut() {
340		if let Call(call_index) = instruction {
341			let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
342			trace!("rewired call {} -> call {}", *call_index, *call_index - totalle as u32);
343			*call_index -= totalle as u32;
344		}
345	}
346}
347
348/// Updates global references considering the _ordered_ list of eliminated indices
349pub fn update_global_index(instructions: &mut Vec<elements::Instruction>, eliminated_indices: &[usize]) {
350	use tetsy_wasm::elements::Instruction::*;
351	for instruction in instructions.iter_mut() {
352		match instruction {
353			GetGlobal(index) | SetGlobal(index) => {
354				let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *index).count();
355				trace!("rewired global {} -> global {}", *index, *index - totalle as u32);
356				*index -= totalle as u32;
357			},
358			_ => { },
359		}
360	}
361}
362
363/// Updates global references considering the _ordered_ list of eliminated indices
364pub fn update_type_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
365	use tetsy_wasm::elements::Instruction::*;
366	for instruction in instructions.elements_mut().iter_mut() {
367		if let CallIndirect(call_index, _) = instruction {
368			let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
369			trace!("rewired call_indrect {} -> call_indirect {}", *call_index, *call_index - totalle as u32);
370			*call_index -= totalle as u32;
371		}
372	}
373}
374
375pub fn import_section(module: &mut elements::Module) -> Option<&mut elements::ImportSection> {
376   for section in module.sections_mut() {
377		if let elements::Section::Import(sect) = section {
378			return Some(sect);
379		}
380	}
381	None
382}
383
384pub fn global_section(module: &mut elements::Module) -> Option<&mut elements::GlobalSection> {
385   for section in module.sections_mut() {
386		if let elements::Section::Global(sect) = section {
387			return Some(sect);
388		}
389	}
390	None
391}
392
393pub fn function_section(module: &mut elements::Module) -> Option<&mut elements::FunctionSection> {
394   for section in module.sections_mut() {
395		if let elements::Section::Function(sect) = section {
396			return Some(sect);
397		}
398	}
399	None
400}
401
402pub fn code_section(module: &mut elements::Module) -> Option<&mut elements::CodeSection> {
403   for section in module.sections_mut() {
404		if let elements::Section::Code(sect) = section {
405			return Some(sect);
406		}
407	}
408	None
409}
410
411pub fn export_section(module: &mut elements::Module) -> Option<&mut elements::ExportSection> {
412   for section in module.sections_mut() {
413		if let elements::Section::Export(sect) = section {
414			return Some(sect);
415		}
416	}
417	None
418}
419
420pub fn type_section(module: &mut elements::Module) -> Option<&mut elements::TypeSection> {
421   for section in module.sections_mut() {
422		if let elements::Section::Type(sect) = section {
423			return Some(sect);
424		}
425	}
426	None
427}
428
429#[cfg(test)]
430mod tests {
431
432	use tetsy_wasm::{builder, elements};
433	use super::*;
434
435	/// @spec 0
436	/// Optimizer presumes that export section exists and contains
437	/// all symbols passed as a second parameter. Since empty module
438	/// obviously contains no export section, optimizer should return
439	/// error on it.
440	#[test]
441	fn empty() {
442		let mut module = builder::module().build();
443		let result = optimize(&mut module, vec!["_call"]);
444
445		assert!(result.is_err());
446	}
447
448	/// @spec 1
449	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
450	/// and exports both of them in the export section. During optimization, the `_random`
451	/// function should vanish completely, given we pass `_call` as the only function to stay
452	/// in the module.
453	#[test]
454	fn minimal() {
455		let mut module = builder::module()
456			.function()
457				.signature().param().i32().build()
458				.build()
459			.function()
460				.signature()
461					.param().i32()
462					.param().i32()
463					.build()
464				.build()
465			.export()
466				.field("_call")
467				.internal().func(0).build()
468			.export()
469				.field("_random")
470				.internal().func(1).build()
471			.build();
472		assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
473
474		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
475
476		assert_eq!(
477			1,
478			module.export_section().expect("export section to be generated").entries().len(),
479			"There should only 1 (one) export entry in the optimized module"
480		);
481
482		assert_eq!(
483			1,
484			module.function_section().expect("functions section to be generated").entries().len(),
485			"There should 2 (two) functions in the optimized module"
486		);
487	}
488
489	/// @spec 2
490	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
491	/// to stay during the optimization. The code of this function uses global during the execution.
492	/// This sayed global should survive the optimization.
493	#[test]
494	fn globals() {
495		let mut module = builder::module()
496			.global()
497				.value_type().i32()
498				.build()
499			.function()
500				.signature().param().i32().build()
501				.body()
502					.with_instructions(elements::Instructions::new(
503						vec![
504							elements::Instruction::GetGlobal(0),
505							elements::Instruction::End
506						]
507					))
508					.build()
509				.build()
510			.export()
511				.field("_call")
512				.internal().func(0).build()
513			.build();
514
515		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
516
517		assert_eq!(
518			1,
519			module.global_section().expect("global section to be generated").entries().len(),
520			"There should 1 (one) global entry in the optimized module, since _call function uses it"
521		);
522	}
523
524	/// @spec 2
525	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
526	/// to stay during the optimization. The code of this function uses one global during the execution,
527	/// but we have a bunch of other unused globals in the code. Last globals should not survive the optimization,
528	/// while the former should.
529	#[test]
530	fn globals_2() {
531		let mut module = builder::module()
532			.global()
533				.value_type().i32()
534				.build()
535			.global()
536				.value_type().i64()
537				.build()
538			.global()
539				.value_type().f32()
540				.build()
541			.function()
542				.signature().param().i32().build()
543				.body()
544					.with_instructions(elements::Instructions::new(
545						vec![
546							elements::Instruction::GetGlobal(1),
547							elements::Instruction::End
548						]
549					))
550					.build()
551				.build()
552			.export()
553				.field("_call")
554				.internal().func(0).build()
555			.build();
556
557		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
558
559		assert_eq!(
560			1,
561			module.global_section().expect("global section to be generated").entries().len(),
562			"There should 1 (one) global entry in the optimized module, since _call function uses only one"
563		);
564	}
565
566	/// @spec 3
567	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
568	/// and exports both of them in the export section. Function `_call` also calls `_random`
569	/// in its function body. The optimization should kick `_random` function from the export section
570	/// but preserve it's body.
571	#[test]
572	fn call_ref() {
573		let mut module = builder::module()
574			.function()
575				.signature().param().i32().build()
576				.body()
577					.with_instructions(elements::Instructions::new(
578						vec![
579							elements::Instruction::Call(1),
580							elements::Instruction::End
581						]
582					))
583					.build()
584				.build()
585			.function()
586				.signature()
587					.param().i32()
588					.param().i32()
589					.build()
590				.build()
591			.export()
592				.field("_call")
593				.internal().func(0).build()
594			.export()
595				.field("_random")
596				.internal().func(1).build()
597			.build();
598		assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
599
600		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
601
602		assert_eq!(
603			1,
604			module.export_section().expect("export section to be generated").entries().len(),
605			"There should only 1 (one) export entry in the optimized module"
606		);
607
608		assert_eq!(
609			2,
610			module.function_section().expect("functions section to be generated").entries().len(),
611			"There should 2 (two) functions in the optimized module"
612		);
613	}
614
615	/// @spec 4
616	/// Imagine the unoptimized module has an indirect call to function of type 1
617	/// The type should persist so that indirect call would work
618	#[test]
619	fn call_indirect() {
620		let mut module = builder::module()
621			.function()
622				.signature().param().i32().param().i32().build()
623				.build()
624			.function()
625				.signature().param().i32().param().i32().param().i32().build()
626				.build()
627			.function()
628				.signature().param().i32().build()
629				.body()
630					.with_instructions(elements::Instructions::new(
631						vec![
632							elements::Instruction::CallIndirect(1, 0),
633							elements::Instruction::End
634						]
635					))
636					.build()
637				.build()
638			.export()
639				.field("_call")
640				.internal().func(2).build()
641			.build();
642
643		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
644
645		assert_eq!(
646			2,
647			module.type_section().expect("type section to be generated").types().len(),
648			"There should 2 (two) types left in the module, 1 for indirect call and one for _call"
649		);
650
651		let indirect_opcode = &module.code_section().expect("code section to be generated").bodies()[0].code().elements()[0];
652		match *indirect_opcode {
653			elements::Instruction::CallIndirect(0, 0) => {},
654			_ => {
655				panic!(
656					"Expected call_indirect to use index 0 after optimization, since previois 0th was eliminated, but got {:?}",
657					indirect_opcode
658				);
659			}
660		}
661	}
662
663}