xwasm_utils/
optimizer.rs

1#[cfg(features = "std")]
2use std::collections::{HashSet as Set};
3#[cfg(not(features = "std"))]
4use std::collections::{BTreeSet as Set};
5use std::vec::Vec;
6use std::mem;
7
8use xwasm::elements;
9
10use symbols::{Symbol, expand_symbols, push_code_symbols, resolve_function};
11
12#[derive(Debug)]
13pub enum Error {
14	/// Since optimizer starts with export entries, export
15	///   section is supposed to exist.
16	NoExportSection,
17}
18
19pub fn optimize(
20	module: &mut elements::Module, // Module to optimize
21	used_exports: Vec<&str>,       // List of only exports that will be usable after optimization
22) -> Result<(), Error> {
23	// WebAssembly exports optimizer
24	// Motivation: emscripten compiler backend compiles in many unused exports
25	//   which in turn compile in unused imports and leaves unused functions
26
27	// try to parse name section
28	let module_temp = mem::replace(module, elements::Module::default());
29	let module_temp = module_temp
30		.parse_names()
31		.unwrap_or_else(|(_err, module)| module);
32	mem::replace(module, module_temp);
33
34	// Algo starts from the top, listing all items that should stay
35	let mut stay = Set::new();
36	for (index, entry) in module.export_section().ok_or(Error::NoExportSection)?.entries().iter().enumerate() {
37		if used_exports.iter().find(|e| **e == entry.field()).is_some() {
38			stay.insert(Symbol::Export(index));
39		}
40	}
41
42	// If there is start function in module, it should stary
43	module.start_section().map(|ss| stay.insert(resolve_function(&module, ss)));
44
45	// All symbols used in data/element segments are also should be preserved
46	let mut init_symbols = Vec::new();
47	if let Some(data_section) = module.data_section() {
48		for segment in data_section.entries() {
49			push_code_symbols(&module, segment.offset().code(), &mut init_symbols);
50		}
51	}
52	if let Some(elements_section) = module.elements_section() {
53		for segment in elements_section.entries() {
54			push_code_symbols(&module, segment.offset().code(), &mut init_symbols);
55			for func_index in segment.members() {
56				stay.insert(resolve_function(&module, *func_index));
57			}
58		}
59	}
60	for symbol in init_symbols.drain(..) { stay.insert(symbol); }
61
62	// Call function which will traverse the list recursively, filling stay with all symbols
63	// that are already used by those which already there
64	expand_symbols(module, &mut stay);
65
66	for symbol in stay.iter() {
67		trace!("symbol to stay: {:?}", symbol);
68	}
69
70	// Keep track of referreable symbols to rewire calls/globals
71	let mut eliminated_funcs = Vec::new();
72	let mut eliminated_globals = Vec::new();
73	let mut eliminated_types = Vec::new();
74
75	// First, iterate through types
76	let mut index = 0;
77	let mut old_index = 0;
78
79	{
80		loop {
81			if type_section(module).map(|section| section.types_mut().len()).unwrap_or(0) == index { break; }
82
83			if stay.contains(&Symbol::Type(old_index)) {
84				index += 1;
85			} else {
86				type_section(module)
87					.expect("If type section does not exists, the loop will break at the beginning of first iteration")
88					.types_mut().remove(index);
89				eliminated_types.push(old_index);
90				trace!("Eliminated type({})", old_index);
91			}
92			old_index += 1;
93		}
94	}
95
96	// Second, iterate through imports
97	let mut top_funcs = 0;
98	let mut top_globals = 0;
99	index = 0;
100	old_index = 0;
101
102	if let Some(imports) = import_section(module) {
103		loop {
104			let mut remove = false;
105			match imports.entries()[index].external() {
106				&elements::External::Function(_) => {
107					if stay.contains(&Symbol::Import(old_index)) {
108						index += 1;
109					} else {
110						remove = true;
111						eliminated_funcs.push(top_funcs);
112						trace!("Eliminated import({}) func({}, {})", old_index, top_funcs, imports.entries()[index].field());
113					}
114					top_funcs += 1;
115				},
116				&elements::External::Global(_) => {
117					if stay.contains(&Symbol::Import(old_index)) {
118						index += 1;
119					} else {
120						remove = true;
121						eliminated_globals.push(top_globals);
122						trace!("Eliminated import({}) global({}, {})", old_index, top_globals, imports.entries()[index].field());
123					}
124					top_globals += 1;
125				},
126				_ => {
127					index += 1;
128				}
129			}
130			if remove {
131				imports.entries_mut().remove(index);
132			}
133
134			old_index += 1;
135
136			if index == imports.entries().len() { break; }
137		}
138	}
139
140	// Third, iterate through globals
141	if let Some(globals) = global_section(module) {
142		index = 0;
143		old_index = 0;
144
145		loop {
146			if globals.entries_mut().len() == index { break; }
147			if stay.contains(&Symbol::Global(old_index)) {
148				index += 1;
149			} else {
150				globals.entries_mut().remove(index);
151				eliminated_globals.push(top_globals + old_index);
152				trace!("Eliminated global({})", top_globals + old_index);
153			}
154			old_index += 1;
155		}
156	}
157
158	// Forth, delete orphaned functions
159	if function_section(module).is_some() && code_section(module).is_some() {
160		index = 0;
161		old_index = 0;
162
163		loop {
164			if function_section(module).expect("Functons section to exist").entries_mut().len() == index { break; }
165			if stay.contains(&Symbol::Function(old_index)) {
166				index += 1;
167			} else {
168				function_section(module).expect("Functons section to exist").entries_mut().remove(index);
169				code_section(module).expect("Code section to exist").bodies_mut().remove(index);
170
171				eliminated_funcs.push(top_funcs + old_index);
172				trace!("Eliminated function({})", top_funcs + old_index);
173			}
174			old_index += 1;
175		}
176	}
177
178	// Fifth, eliminate unused exports
179	{
180		let exports = export_section(module).ok_or(Error::NoExportSection)?;
181
182		index = 0;
183		old_index = 0;
184
185		loop {
186			if exports.entries_mut().len() == index { break; }
187			if stay.contains(&Symbol::Export(old_index)) {
188				index += 1;
189			} else {
190				trace!("Eliminated export({}, {})", old_index, exports.entries_mut()[index].field());
191				exports.entries_mut().remove(index);
192			}
193			old_index += 1;
194		}
195	}
196
197	if eliminated_globals.len() > 0 || eliminated_funcs.len() > 0 || eliminated_types.len() > 0 {
198		// Finaly, rewire all calls, globals references and types to the new indices
199		//   (only if there is anything to do)
200		eliminated_globals.sort();
201		eliminated_funcs.sort();
202		eliminated_types.sort();
203
204		for section in module.sections_mut() {
205			match section {
206				&mut elements::Section::Start(ref mut func_index) if eliminated_funcs.len() > 0 => {
207					let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
208					*func_index -= totalle as u32;
209				},
210				&mut elements::Section::Function(ref mut function_section) if eliminated_types.len() > 0 => {
211					for ref mut func_signature in function_section.entries_mut() {
212						let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < func_signature.type_ref()).count();
213						*func_signature.type_ref_mut() -= totalle as u32;
214					}
215				},
216				&mut elements::Section::Import(ref mut import_section) if eliminated_types.len() > 0 => {
217					for ref mut import_entry in import_section.entries_mut() {
218						if let &mut elements::External::Function(ref mut type_ref) = import_entry.external_mut() {
219							let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < *type_ref).count();
220							*type_ref -= totalle as u32;
221						}
222					}
223				},
224				&mut elements::Section::Code(ref mut code_section) if eliminated_globals.len() > 0 || eliminated_funcs.len() > 0 => {
225					for ref mut func_body in code_section.bodies_mut() {
226						if eliminated_funcs.len() > 0 {
227							update_call_index(func_body.code_mut(), &eliminated_funcs);
228						}
229						if eliminated_globals.len() > 0 {
230							update_global_index(func_body.code_mut().elements_mut(), &eliminated_globals)
231						}
232						if eliminated_types.len() > 0 {
233							update_type_index(func_body.code_mut(), &eliminated_types)
234						}
235					}
236				},
237				&mut elements::Section::Export(ref mut export_section) => {
238					for ref mut export in export_section.entries_mut() {
239						match export.internal_mut() {
240							&mut elements::Internal::Function(ref mut func_index) => {
241								let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
242								*func_index -= totalle as u32;
243							},
244							&mut elements::Internal::Global(ref mut global_index) => {
245								let totalle = eliminated_globals.iter().take_while(|i| (**i as u32) < *global_index).count();
246								*global_index -= totalle as u32;
247							},
248							_ => {}
249						}
250					}
251				},
252				&mut elements::Section::Global(ref mut global_section) => {
253					for ref mut global_entry in global_section.entries_mut() {
254						update_global_index(global_entry.init_expr_mut().code_mut(), &eliminated_globals)
255					}
256				},
257				&mut elements::Section::Data(ref mut data_section) => {
258					for ref mut segment in data_section.entries_mut() {
259						update_global_index(segment.offset_mut().code_mut(), &eliminated_globals)
260					}
261				},
262				&mut elements::Section::Element(ref mut elements_section) => {
263					for ref mut segment in elements_section.entries_mut() {
264						update_global_index(segment.offset_mut().code_mut(), &eliminated_globals);
265						// update all indirect call addresses initial values
266						for func_index in segment.members_mut() {
267							let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
268							*func_index -= totalle as u32;
269						}
270					}
271				},
272				&mut elements::Section::Name(ref mut name_section) => {
273					match name_section {
274						&mut elements::NameSection::Function(ref mut func_name) => {
275							let mut func_name_map = mem::replace(func_name.names_mut(), elements::IndexMap::default());
276							for index in &eliminated_funcs {
277								func_name_map.remove(*index as u32);
278							}
279							let updated_map = func_name_map.into_iter().map(|(index, value)| {
280								let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < index).count() as u32;
281								(index - totalle, value)
282							}).collect();
283							mem::replace(func_name.names_mut(), updated_map);
284						}
285						&mut elements::NameSection::Local(ref mut local_name) => {
286							let mut local_names_map = mem::replace(local_name.local_names_mut(), elements::IndexMap::default());
287							for index in &eliminated_funcs {
288								local_names_map.remove(*index as u32);
289							}
290							let updated_map = local_names_map.into_iter().map(|(index, value)| {
291								let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < index).count() as u32;
292								(index - totalle, value)
293							}).collect();
294
295							mem::replace(local_name.local_names_mut(), updated_map);
296						}
297						_ => {}
298					}
299				}
300				_ => { }
301			}
302		}
303	}
304
305	Ok(())
306}
307
308
309pub fn update_call_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
310	use xwasm::elements::Instruction::*;
311	for instruction in instructions.elements_mut().iter_mut() {
312		if let &mut Call(ref mut call_index) = instruction {
313			let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
314			trace!("rewired call {} -> call {}", *call_index, *call_index - totalle as u32);
315			*call_index -= totalle as u32;
316		}
317	}
318}
319
320/// Updates global references considering the _ordered_ list of eliminated indices
321pub fn update_global_index(instructions: &mut Vec<elements::Instruction>, eliminated_indices: &[usize]) {
322	use xwasm::elements::Instruction::*;
323	for instruction in instructions.iter_mut() {
324		match instruction {
325			&mut GetGlobal(ref mut index) | &mut SetGlobal(ref mut index) => {
326				let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *index).count();
327				trace!("rewired global {} -> global {}", *index, *index - totalle as u32);
328				*index -= totalle as u32;
329			},
330			_ => { },
331		}
332	}
333}
334
335/// Updates global references considering the _ordered_ list of eliminated indices
336pub fn update_type_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
337	use xwasm::elements::Instruction::*;
338	for instruction in instructions.elements_mut().iter_mut() {
339		if let &mut CallIndirect(ref mut call_index, _) = instruction {
340			let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
341			trace!("rewired call_indrect {} -> call_indirect {}", *call_index, *call_index - totalle as u32);
342			*call_index -= totalle as u32;
343		}
344	}
345}
346
347pub fn import_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::ImportSection> {
348   for section in module.sections_mut() {
349		if let &mut elements::Section::Import(ref mut sect) = section {
350			return Some(sect);
351		}
352	}
353	None
354}
355
356pub fn global_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::GlobalSection> {
357   for section in module.sections_mut() {
358		if let &mut elements::Section::Global(ref mut sect) = section {
359			return Some(sect);
360		}
361	}
362	None
363}
364
365pub fn function_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::FunctionSection> {
366   for section in module.sections_mut() {
367		if let &mut elements::Section::Function(ref mut sect) = section {
368			return Some(sect);
369		}
370	}
371	None
372}
373
374pub fn code_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::CodeSection> {
375   for section in module.sections_mut() {
376		if let &mut elements::Section::Code(ref mut sect) = section {
377			return Some(sect);
378		}
379	}
380	None
381}
382
383pub fn export_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::ExportSection> {
384   for section in module.sections_mut() {
385		if let &mut elements::Section::Export(ref mut sect) = section {
386			return Some(sect);
387		}
388	}
389	None
390}
391
392pub fn type_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::TypeSection> {
393   for section in module.sections_mut() {
394		if let &mut elements::Section::Type(ref mut sect) = section {
395			return Some(sect);
396		}
397	}
398	None
399}
400
401#[cfg(test)]
402mod tests {
403
404	use xwasm::{builder, elements};
405	use super::*;
406
407	/// @spec 0
408	/// Optimizer presumes that export section exists and contains
409	/// all symbols passed as a second parameter. Since empty module
410	/// obviously contains no export section, optimizer should return
411	/// error on it.
412	#[test]
413	fn empty() {
414		let mut module = builder::module().build();
415		let result = optimize(&mut module, vec!["_call"]);
416
417		assert!(result.is_err());
418	}
419
420	/// @spec 1
421	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
422	/// and exports both of them in the export section. During optimization, the `_random`
423	/// function should vanish completely, given we pass `_call` as the only function to stay
424	/// in the module.
425	#[test]
426	fn minimal() {
427		let mut module = builder::module()
428			.function()
429				.signature().param().i32().build()
430				.build()
431			.function()
432				.signature()
433					.param().i32()
434					.param().i32()
435					.build()
436				.build()
437			.export()
438				.field("_call")
439				.internal().func(0).build()
440			.export()
441				.field("_random")
442				.internal().func(1).build()
443			.build();
444		assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
445
446		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
447
448		assert_eq!(
449			1,
450			module.export_section().expect("export section to be generated").entries().len(),
451			"There should only 1 (one) export entry in the optimized module"
452		);
453
454		assert_eq!(
455			1,
456			module.function_section().expect("functions section to be generated").entries().len(),
457			"There should 2 (two) functions in the optimized module"
458		);
459	}
460
461	/// @spec 2
462	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
463	/// to stay during the optimization. The code of this function uses global during the execution.
464	/// This sayed global should survive the optimization.
465	#[test]
466	fn globals() {
467		let mut module = builder::module()
468			.global()
469				.value_type().i32()
470				.build()
471			.function()
472				.signature().param().i32().build()
473				.body()
474					.with_instructions(elements::Instructions::new(
475						vec![
476							elements::Instruction::GetGlobal(0),
477							elements::Instruction::End
478						]
479					))
480					.build()
481				.build()
482			.export()
483				.field("_call")
484				.internal().func(0).build()
485			.build();
486
487		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
488
489		assert_eq!(
490			1,
491			module.global_section().expect("global section to be generated").entries().len(),
492			"There should 1 (one) global entry in the optimized module, since _call function uses it"
493		);
494	}
495
496	/// @spec 2
497	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
498	/// to stay during the optimization. The code of this function uses one global during the execution,
499	/// but we have a bunch of other unused globals in the code. Last globals should not survive the optimization,
500	/// while the former should.
501	#[test]
502	fn globals_2() {
503		let mut module = builder::module()
504			.global()
505				.value_type().i32()
506				.build()
507			.global()
508				.value_type().i64()
509				.build()
510			.global()
511				.value_type().f32()
512				.build()
513			.function()
514				.signature().param().i32().build()
515				.body()
516					.with_instructions(elements::Instructions::new(
517						vec![
518							elements::Instruction::GetGlobal(1),
519							elements::Instruction::End
520						]
521					))
522					.build()
523				.build()
524			.export()
525				.field("_call")
526				.internal().func(0).build()
527			.build();
528
529		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
530
531		assert_eq!(
532			1,
533			module.global_section().expect("global section to be generated").entries().len(),
534			"There should 1 (one) global entry in the optimized module, since _call function uses only one"
535		);
536	}
537
538	/// @spec 3
539	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
540	/// and exports both of them in the export section. Function `_call` also calls `_random`
541	/// in its function body. The optimization should kick `_random` function from the export section
542	/// but preserve it's body.
543	#[test]
544	fn call_ref() {
545		let mut module = builder::module()
546			.function()
547				.signature().param().i32().build()
548				.body()
549					.with_instructions(elements::Instructions::new(
550						vec![
551							elements::Instruction::Call(1),
552							elements::Instruction::End
553						]
554					))
555					.build()
556				.build()
557			.function()
558				.signature()
559					.param().i32()
560					.param().i32()
561					.build()
562				.build()
563			.export()
564				.field("_call")
565				.internal().func(0).build()
566			.export()
567				.field("_random")
568				.internal().func(1).build()
569			.build();
570		assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
571
572		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
573
574		assert_eq!(
575			1,
576			module.export_section().expect("export section to be generated").entries().len(),
577			"There should only 1 (one) export entry in the optimized module"
578		);
579
580		assert_eq!(
581			2,
582			module.function_section().expect("functions section to be generated").entries().len(),
583			"There should 2 (two) functions in the optimized module"
584		);
585	}
586
587	/// @spec 4
588	/// Imagine the unoptimized module has an indirect call to function of type 1
589	/// The type should persist so that indirect call would work
590	#[test]
591	fn call_indirect() {
592		let mut module = builder::module()
593			.function()
594				.signature().param().i32().param().i32().build()
595				.build()
596			.function()
597				.signature().param().i32().param().i32().param().i32().build()
598				.build()
599			.function()
600				.signature().param().i32().build()
601				.body()
602					.with_instructions(elements::Instructions::new(
603						vec![
604							elements::Instruction::CallIndirect(1, 0),
605							elements::Instruction::End
606						]
607					))
608					.build()
609				.build()
610			.export()
611				.field("_call")
612				.internal().func(2).build()
613			.build();
614
615		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
616
617		assert_eq!(
618			2,
619			module.type_section().expect("type section to be generated").types().len(),
620			"There should 2 (two) types left in the module, 1 for indirect call and one for _call"
621		);
622
623		let indirect_opcode = &module.code_section().expect("code section to be generated").bodies()[0].code().elements()[0];
624		match *indirect_opcode {
625			elements::Instruction::CallIndirect(0, 0) => {},
626			_ => {
627				panic!(
628					"Expected call_indirect to use index 0 after optimization, since previois 0th was eliminated, but got {:?}",
629					indirect_opcode
630				);
631			}
632		}
633	}
634
635}