swasm_utils/
optimizer.rs

1#[cfg(features = "std")]
2use std::collections::{HashSet as Set};
3#[cfg(not(features = "std"))]
4use std::collections::{BTreeSet as Set};
5use std::vec::Vec;
6
7use swasm::elements;
8
9use symbols::{Symbol, expand_symbols, push_code_symbols, resolve_function};
10
11#[derive(Debug)]
12pub enum Error {
13	/// Since optimizer starts with export entries, export
14	///   section is supposed to exist.
15	NoExportSection,
16}
17
18pub fn optimize(
19	module: &mut elements::Module, // Module to optimize
20	used_exports: Vec<&str>,       // List of only exports that will be usable after optimization
21) -> Result<(), Error> {
22	// WebAssembly exports optimizer
23	// Motivation: emscripten compiler backend compiles in many unused exports
24	//   which in turn compile in unused imports and leaves unused functions
25
26	// Algo starts from the top, listing all items that should stay
27	let mut stay = Set::new();
28	for (index, entry) in module.export_section().ok_or(Error::NoExportSection)?.entries().iter().enumerate() {
29		if used_exports.iter().find(|e| **e == entry.field()).is_some() {
30			stay.insert(Symbol::Export(index));
31		}
32	}
33
34	// If there is start function in module, it should stary
35	module.start_section().map(|ss| stay.insert(resolve_function(&module, ss)));
36
37	// All symbols used in data/element segments are also should be preserved
38	let mut init_symbols = Vec::new();
39	if let Some(data_section) = module.data_section() {
40		for segment in data_section.entries() {
41			push_code_symbols(&module, segment.offset().code(), &mut init_symbols);
42		}
43	}
44	if let Some(elements_section) = module.elements_section() {
45		for segment in elements_section.entries() {
46			push_code_symbols(&module, segment.offset().code(), &mut init_symbols);
47			for func_index in segment.members() {
48				stay.insert(resolve_function(&module, *func_index));
49			}
50		}
51	}
52	for symbol in init_symbols.drain(..) { stay.insert(symbol); }
53
54	// Call function which will traverse the list recursively, filling stay with all symbols
55	// that are already used by those which already there
56	expand_symbols(module, &mut stay);
57
58	for symbol in stay.iter() {
59		trace!("symbol to stay: {:?}", symbol);
60	}
61
62	// Keep track of referreable symbols to rewire calls/globals
63	let mut eliminated_funcs = Vec::new();
64	let mut eliminated_globals = Vec::new();
65	let mut eliminated_types = Vec::new();
66
67	// First, iterate through types
68	let mut index = 0;
69	let mut old_index = 0;
70
71	{
72		loop {
73			if type_section(module).map(|section| section.types_mut().len()).unwrap_or(0) == index { break; }
74
75			if stay.contains(&Symbol::Type(old_index)) {
76				index += 1;
77			} else {
78				type_section(module)
79					.expect("If type section does not exists, the loop will break at the beginning of first iteration")
80					.types_mut().remove(index);
81				eliminated_types.push(old_index);
82				trace!("Eliminated type({})", old_index);
83			}
84			old_index += 1;
85		}
86	}
87
88	// Second, iterate through imports
89	let mut top_funcs = 0;
90	let mut top_globals = 0;
91	index = 0;
92	old_index = 0;
93
94	if let Some(imports) = import_section(module) {
95		loop {
96			let mut remove = false;
97			match imports.entries()[index].external() {
98				&elements::External::Function(_) => {
99					if stay.contains(&Symbol::Import(old_index)) {
100						index += 1;
101					} else {
102						remove = true;
103						eliminated_funcs.push(top_funcs);
104						trace!("Eliminated import({}) func({}, {})", old_index, top_funcs, imports.entries()[index].field());
105					}
106					top_funcs += 1;
107				},
108				&elements::External::Global(_) => {
109					if stay.contains(&Symbol::Import(old_index)) {
110						index += 1;
111					} else {
112						remove = true;
113						eliminated_globals.push(top_globals);
114						trace!("Eliminated import({}) global({}, {})", old_index, top_globals, imports.entries()[index].field());
115					}
116					top_globals += 1;
117				},
118				_ => {
119					index += 1;
120				}
121			}
122			if remove {
123				imports.entries_mut().remove(index);
124			}
125
126			old_index += 1;
127
128			if index == imports.entries().len() { break; }
129		}
130	}
131
132	// Third, iterate through globals
133	if let Some(globals) = global_section(module) {
134		index = 0;
135		old_index = 0;
136
137		loop {
138			if globals.entries_mut().len() == index { break; }
139			if stay.contains(&Symbol::Global(old_index)) {
140				index += 1;
141			} else {
142				globals.entries_mut().remove(index);
143				eliminated_globals.push(top_globals + old_index);
144				trace!("Eliminated global({})", top_globals + old_index);
145			}
146			old_index += 1;
147		}
148	}
149
150	// Forth, delete orphaned functions
151	if function_section(module).is_some() && code_section(module).is_some() {
152		index = 0;
153		old_index = 0;
154
155		loop {
156			if function_section(module).expect("Functons section to exist").entries_mut().len() == index { break; }
157			if stay.contains(&Symbol::Function(old_index)) {
158				index += 1;
159			} else {
160				function_section(module).expect("Functons section to exist").entries_mut().remove(index);
161				code_section(module).expect("Code section to exist").bodies_mut().remove(index);
162
163				eliminated_funcs.push(top_funcs + old_index);
164				trace!("Eliminated function({})", top_funcs + old_index);
165			}
166			old_index += 1;
167		}
168	}
169
170	// Fifth, eliminate unused exports
171	{
172		let exports = export_section(module).ok_or(Error::NoExportSection)?;
173
174		index = 0;
175		old_index = 0;
176
177		loop {
178			if exports.entries_mut().len() == index { break; }
179			if stay.contains(&Symbol::Export(old_index)) {
180				index += 1;
181			} else {
182				trace!("Eliminated export({}, {})", old_index, exports.entries_mut()[index].field());
183				exports.entries_mut().remove(index);
184			}
185			old_index += 1;
186		}
187	}
188
189	if eliminated_globals.len() > 0 || eliminated_funcs.len() > 0 || eliminated_types.len() > 0 {
190		// Finaly, rewire all calls, globals references and types to the new indices
191		//   (only if there is anything to do)
192		eliminated_globals.sort();
193		eliminated_funcs.sort();
194		eliminated_types.sort();
195
196		for section in module.sections_mut() {
197			match section {
198				&mut elements::Section::Start(ref mut func_index) if eliminated_funcs.len() > 0 => {
199					let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
200					*func_index -= totalle as u32;
201				},
202				&mut elements::Section::Function(ref mut function_section) if eliminated_types.len() > 0 => {
203					for ref mut func_signature in function_section.entries_mut() {
204						let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < func_signature.type_ref()).count();
205						*func_signature.type_ref_mut() -= totalle as u32;
206					}
207				},
208				&mut elements::Section::Import(ref mut import_section) if eliminated_types.len() > 0 => {
209					for ref mut import_entry in import_section.entries_mut() {
210						if let &mut elements::External::Function(ref mut type_ref) = import_entry.external_mut() {
211							let totalle = eliminated_types.iter().take_while(|i| (**i as u32) < *type_ref).count();
212							*type_ref -= totalle as u32;
213						}
214					}
215				},
216				&mut elements::Section::Code(ref mut code_section) if eliminated_globals.len() > 0 || eliminated_funcs.len() > 0 => {
217					for ref mut func_body in code_section.bodies_mut() {
218						if eliminated_funcs.len() > 0 {
219							update_call_index(func_body.code_mut(), &eliminated_funcs);
220						}
221						if eliminated_globals.len() > 0 {
222							update_global_index(func_body.code_mut().elements_mut(), &eliminated_globals)
223						}
224						if eliminated_types.len() > 0 {
225							update_type_index(func_body.code_mut(), &eliminated_types)
226						}
227					}
228				},
229				&mut elements::Section::Export(ref mut export_section) => {
230					for ref mut export in export_section.entries_mut() {
231						match export.internal_mut() {
232							&mut elements::Internal::Function(ref mut func_index) => {
233								let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
234								*func_index -= totalle as u32;
235							},
236							&mut elements::Internal::Global(ref mut global_index) => {
237								let totalle = eliminated_globals.iter().take_while(|i| (**i as u32) < *global_index).count();
238								*global_index -= totalle as u32;
239							},
240							_ => {}
241						}
242					}
243				},
244				&mut elements::Section::Global(ref mut global_section) => {
245					for ref mut global_entry in global_section.entries_mut() {
246						update_global_index(global_entry.init_expr_mut().code_mut(), &eliminated_globals)
247					}
248				},
249				&mut elements::Section::Data(ref mut data_section) => {
250					for ref mut segment in data_section.entries_mut() {
251						update_global_index(segment.offset_mut().code_mut(), &eliminated_globals)
252					}
253				},
254				&mut elements::Section::Element(ref mut elements_section) => {
255					for ref mut segment in elements_section.entries_mut() {
256						update_global_index(segment.offset_mut().code_mut(), &eliminated_globals);
257						// update all indirect call addresses initial values
258						for func_index in segment.members_mut() {
259							let totalle = eliminated_funcs.iter().take_while(|i| (**i as u32) < *func_index).count();
260							*func_index -= totalle as u32;
261						}
262					}
263				},
264				_ => { }
265			}
266		}
267	}
268
269	Ok(())
270}
271
272
273pub fn update_call_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
274	use swasm::elements::Instruction::*;
275	for instruction in instructions.elements_mut().iter_mut() {
276		if let &mut Call(ref mut call_index) = instruction {
277			let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
278			trace!("rewired call {} -> call {}", *call_index, *call_index - totalle as u32);
279			*call_index -= totalle as u32;
280		}
281	}
282}
283
284/// Updates global references considering the _ordered_ list of eliminated indices
285pub fn update_global_index(instructions: &mut Vec<elements::Instruction>, eliminated_indices: &[usize]) {
286	use swasm::elements::Instruction::*;
287	for instruction in instructions.iter_mut() {
288		match instruction {
289			&mut GetGlobal(ref mut index) | &mut SetGlobal(ref mut index) => {
290				let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *index).count();
291				trace!("rewired global {} -> global {}", *index, *index - totalle as u32);
292				*index -= totalle as u32;
293			},
294			_ => { },
295		}
296	}
297}
298
299/// Updates global references considering the _ordered_ list of eliminated indices
300pub fn update_type_index(instructions: &mut elements::Instructions, eliminated_indices: &[usize]) {
301	use swasm::elements::Instruction::*;
302	for instruction in instructions.elements_mut().iter_mut() {
303		if let &mut CallIndirect(ref mut call_index, _) = instruction {
304			let totalle = eliminated_indices.iter().take_while(|i| (**i as u32) < *call_index).count();
305			trace!("rewired call_indrect {} -> call_indirect {}", *call_index, *call_index - totalle as u32);
306			*call_index -= totalle as u32;
307		}
308	}
309}
310
311pub fn import_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::ImportSection> {
312   for section in module.sections_mut() {
313		if let &mut elements::Section::Import(ref mut sect) = section {
314			return Some(sect);
315		}
316	}
317	None
318}
319
320pub fn global_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::GlobalSection> {
321   for section in module.sections_mut() {
322		if let &mut elements::Section::Global(ref mut sect) = section {
323			return Some(sect);
324		}
325	}
326	None
327}
328
329pub fn function_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::FunctionSection> {
330   for section in module.sections_mut() {
331		if let &mut elements::Section::Function(ref mut sect) = section {
332			return Some(sect);
333		}
334	}
335	None
336}
337
338pub fn code_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::CodeSection> {
339   for section in module.sections_mut() {
340		if let &mut elements::Section::Code(ref mut sect) = section {
341			return Some(sect);
342		}
343	}
344	None
345}
346
347pub fn export_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::ExportSection> {
348   for section in module.sections_mut() {
349		if let &mut elements::Section::Export(ref mut sect) = section {
350			return Some(sect);
351		}
352	}
353	None
354}
355
356pub fn type_section<'a>(module: &'a mut elements::Module) -> Option<&'a mut elements::TypeSection> {
357   for section in module.sections_mut() {
358		if let &mut elements::Section::Type(ref mut sect) = section {
359			return Some(sect);
360		}
361	}
362	None
363}
364
365#[cfg(test)]
366mod tests {
367
368	use swasm::{builder, elements};
369	use super::*;
370
371	/// @spec 0
372	/// Optimizer presumes that export section exists and contains
373	/// all symbols passed as a second parameter. Since empty module
374	/// obviously contains no export section, optimizer should return
375	/// error on it.
376	#[test]
377	fn empty() {
378		let mut module = builder::module().build();
379		let result = optimize(&mut module, vec!["_call"]);
380
381		assert!(result.is_err());
382	}
383
384	/// @spec 1
385	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
386	/// and exports both of them in the export section. During optimization, the `_random`
387	/// function should vanish completely, given we pass `_call` as the only function to stay
388	/// in the module.
389	#[test]
390	fn minimal() {
391		let mut module = builder::module()
392			.function()
393				.signature().param().i32().build()
394				.build()
395			.function()
396				.signature()
397					.param().i32()
398					.param().i32()
399					.build()
400				.build()
401			.export()
402				.field("_call")
403				.internal().func(0).build()
404			.export()
405				.field("_random")
406				.internal().func(1).build()
407			.build();
408		assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
409
410		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
411
412		assert_eq!(
413			1,
414			module.export_section().expect("export section to be generated").entries().len(),
415			"There should only 1 (one) export entry in the optimized module"
416		);
417
418		assert_eq!(
419			1,
420			module.function_section().expect("functions section to be generated").entries().len(),
421			"There should 2 (two) functions in the optimized module"
422		);
423	}
424
425	/// @spec 2
426	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
427	/// to stay during the optimization. The code of this function uses global during the execution.
428	/// This sayed global should survive the optimization.
429	#[test]
430	fn globals() {
431		let mut module = builder::module()
432			.global()
433				.value_type().i32()
434				.build()
435			.function()
436				.signature().param().i32().build()
437				.body()
438					.with_instructions(elements::Instructions::new(
439						vec![
440							elements::Instruction::GetGlobal(0),
441							elements::Instruction::End
442						]
443					))
444					.build()
445				.build()
446			.export()
447				.field("_call")
448				.internal().func(0).build()
449			.build();
450
451		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
452
453		assert_eq!(
454			1,
455			module.global_section().expect("global section to be generated").entries().len(),
456			"There should 1 (one) global entry in the optimized module, since _call function uses it"
457		);
458	}
459
460	/// @spec 2
461	/// Imagine there is one exported function in unoptimized module, `_call`, that we specify as the one
462	/// to stay during the optimization. The code of this function uses one global during the execution,
463	/// but we have a bunch of other unused globals in the code. Last globals should not survive the optimization,
464	/// while the former should.
465	#[test]
466	fn globals_2() {
467		let mut module = builder::module()
468			.global()
469				.value_type().i32()
470				.build()
471			.global()
472				.value_type().i64()
473				.build()
474			.global()
475				.value_type().f32()
476				.build()
477			.function()
478				.signature().param().i32().build()
479				.body()
480					.with_instructions(elements::Instructions::new(
481						vec![
482							elements::Instruction::GetGlobal(1),
483							elements::Instruction::End
484						]
485					))
486					.build()
487				.build()
488			.export()
489				.field("_call")
490				.internal().func(0).build()
491			.build();
492
493		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
494
495		assert_eq!(
496			1,
497			module.global_section().expect("global section to be generated").entries().len(),
498			"There should 1 (one) global entry in the optimized module, since _call function uses only one"
499		);
500	}
501
502	/// @spec 3
503	/// Imagine the unoptimized module has two own functions, `_call` and `_random`
504	/// and exports both of them in the export section. Function `_call` also calls `_random`
505	/// in its function body. The optimization should kick `_random` function from the export section
506	/// but preserve it's body.
507	#[test]
508	fn call_ref() {
509		let mut module = builder::module()
510			.function()
511				.signature().param().i32().build()
512				.body()
513					.with_instructions(elements::Instructions::new(
514						vec![
515							elements::Instruction::Call(1),
516							elements::Instruction::End
517						]
518					))
519					.build()
520				.build()
521			.function()
522				.signature()
523					.param().i32()
524					.param().i32()
525					.build()
526				.build()
527			.export()
528				.field("_call")
529				.internal().func(0).build()
530			.export()
531				.field("_random")
532				.internal().func(1).build()
533			.build();
534		assert_eq!(module.export_section().expect("export section to be generated").entries().len(), 2);
535
536		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
537
538		assert_eq!(
539			1,
540			module.export_section().expect("export section to be generated").entries().len(),
541			"There should only 1 (one) export entry in the optimized module"
542		);
543
544		assert_eq!(
545			2,
546			module.function_section().expect("functions section to be generated").entries().len(),
547			"There should 2 (two) functions in the optimized module"
548		);
549	}
550
551	/// @spec 4
552	/// Imagine the unoptimized module has an indirect call to function of type 1
553	/// The type should persist so that indirect call would work
554	#[test]
555	fn call_indirect() {
556		let mut module = builder::module()
557			.function()
558				.signature().param().i32().param().i32().build()
559				.build()
560			.function()
561				.signature().param().i32().param().i32().param().i32().build()
562				.build()
563			.function()
564				.signature().param().i32().build()
565				.body()
566					.with_instructions(elements::Instructions::new(
567						vec![
568							elements::Instruction::CallIndirect(1, 0),
569							elements::Instruction::End
570						]
571					))
572					.build()
573				.build()
574			.export()
575				.field("_call")
576				.internal().func(2).build()
577			.build();
578
579		optimize(&mut module, vec!["_call"]).expect("optimizer to succeed");
580
581		assert_eq!(
582			2,
583			module.type_section().expect("type section to be generated").types().len(),
584			"There should 2 (two) types left in the module, 1 for indirect call and one for _call"
585		);
586
587		let indirect_opcode = &module.code_section().expect("code section to be generated").bodies()[0].code().elements()[0];
588		match *indirect_opcode {
589			elements::Instruction::CallIndirect(0, 0) => {},
590			_ => {
591				panic!(
592					"Expected call_indirect to use index 0 after optimization, since previois 0th was eliminated, but got {:?}",
593					indirect_opcode
594				);
595			}
596		}
597	}
598
599}