wasm_used/
lib.rs

1use std::collections::HashSet;
2
3use id_arena::Id;
4use walrus::{ir::*, ExportId};
5use walrus::{ConstExpr, Data, DataId, DataKind, Element, ExportItem, Function};
6use walrus::{ElementId, ElementItems, ElementKind, Module, RefType, Type, TypeId};
7use walrus::{FunctionId, FunctionKind, Global, GlobalId};
8use walrus::{GlobalKind, Memory, MemoryId, Table, TableId};
9
10type IdHashSet<T> = HashSet<Id<T>>;
11
12/// Set of all root used items in a wasm module.
13#[derive(Debug, Default)]
14pub struct Roots {
15    tables: Vec<TableId>,
16    funcs: Vec<(FunctionId, Location)>,
17    globals: Vec<GlobalId>,
18    memories: Vec<MemoryId>,
19    data: Vec<DataId>,
20    elements: Vec<ElementId>,
21    used: Used,
22}
23
24#[allow(dead_code)]
25#[derive(Debug)]
26pub enum Location {
27    Start,
28    Export { export: ExportId },
29    Table { table: TableId },
30    Memory { memory: MemoryId },
31    Global { global: GlobalId },
32    Data,
33    Element { element: ElementId },
34    Code { func: FunctionId },
35}
36
37impl Roots {
38    /// Creates a new set of empty roots.
39    pub fn new() -> Roots {
40        Roots::default()
41    }
42
43    /// Adds a new function to the set of roots
44    pub fn push_func(&mut self, func: FunctionId, from: Location) -> &mut Roots {
45        if self.used.funcs.insert(func) {
46            // log::trace!("function is used: {:?}", func);
47            self.funcs.push((func, from));
48        }
49        self
50    }
51
52    /// Adds a new table to the set of roots
53    pub fn push_table(&mut self, table: TableId) -> &mut Roots {
54        if self.used.tables.insert(table) {
55            // log::trace!("table is used: {:?}", table);
56            self.tables.push(table);
57        }
58        self
59    }
60
61    /// Adds a new memory to the set of roots
62    pub fn push_memory(&mut self, memory: MemoryId) -> &mut Roots {
63        if self.used.memories.insert(memory) {
64            // log::trace!("memory is used: {:?}", memory);
65            self.memories.push(memory);
66        }
67        self
68    }
69
70    /// Adds a new global to the set of roots
71    pub fn push_global(&mut self, global: GlobalId) -> &mut Roots {
72        if self.used.globals.insert(global) {
73            // log::trace!("global is used: {:?}", global);
74            self.globals.push(global);
75        }
76        self
77    }
78
79    fn push_data(&mut self, data: DataId) -> &mut Roots {
80        if self.used.data.insert(data) {
81            // log::trace!("data is used: {:?}", data);
82            self.data.push(data);
83        }
84        self
85    }
86
87    fn push_element(&mut self, element: ElementId) -> &mut Roots {
88        if self.used.elements.insert(element) {
89            // log::trace!("element is used: {:?}", element);
90            self.elements.push(element);
91        }
92        self
93    }
94}
95
96/// Finds the things within a module that are used.
97///
98/// This is useful for implementing something like a linker's `--gc-sections` so
99/// that our emitted `.wasm` binaries are small and don't contain things that
100/// are not used.
101#[derive(Debug, Default)]
102pub struct Used {
103    /// The module's used tables.
104    pub tables: IdHashSet<Table>,
105    /// The module's used types.
106    pub types: IdHashSet<Type>,
107    /// The module's used functions.
108    pub funcs: IdHashSet<Function>,
109    /// The module's used globals.
110    pub globals: IdHashSet<Global>,
111    /// The module's used memories.
112    pub memories: IdHashSet<Memory>,
113    /// The module's used passive element segments.
114    pub elements: IdHashSet<Element>,
115    /// The module's used passive data segments.
116    pub data: IdHashSet<Data>,
117}
118
119impl Used {
120    /// Construct a new `Used` set for the given module.
121    pub fn new(module: &Module, deleted: &HashSet<FunctionId>) -> Used {
122        // log::debug!("starting to calculate used set");
123        let mut stack = Roots::default();
124
125        // All exports are roots
126        for export in module.exports.iter() {
127            match export.item {
128                ExportItem::Function(f) => stack.push_func(
129                    f,
130                    Location::Export {
131                        export: export.id(),
132                    },
133                ),
134                ExportItem::Table(t) => stack.push_table(t),
135                ExportItem::Memory(m) => stack.push_memory(m),
136                ExportItem::Global(g) => stack.push_global(g),
137            };
138        }
139
140        // The start function is an implicit root as well
141        if let Some(f) = module.start {
142            stack.push_func(f, Location::Start);
143        }
144
145        // Initialization of memories or tables is a side-effectful operation
146        // because they can be out-of-bounds, so keep all active segments.
147        for data in module.data.iter() {
148            if let DataKind::Active { .. } = &data.kind {
149                stack.push_data(data.id());
150            }
151        }
152        for elem in module.elements.iter() {
153            match elem.kind {
154                // Active segments are rooted because they initialize imported
155                // tables.
156                ElementKind::Active { table, .. } => {
157                    if module.tables.get(table).import.is_some() {
158                        stack.push_element(elem.id());
159                    }
160                }
161                // Declared segments can probably get gc'd but for now we're
162                // conservative and we root them
163                ElementKind::Declared => {
164                    stack.push_element(elem.id());
165                }
166                ElementKind::Passive => {}
167            }
168        }
169
170        // // And finally ask custom sections for their roots
171        // for (_id, section) in module.customs.iter() {
172        //     section.add_gc_roots(&mut stack);
173        // }
174        // tracing::info!("Used roots: {:#?}", stack);
175
176        // Iteratively visit all items until our stack is empty
177        while !stack.funcs.is_empty()
178            || !stack.tables.is_empty()
179            || !stack.memories.is_empty()
180            || !stack.globals.is_empty()
181            || !stack.data.is_empty()
182            || !stack.elements.is_empty()
183        {
184            while let Some((f, _loc)) = stack.funcs.pop() {
185                if deleted.contains(&f) {
186                    let func = module.funcs.get(f);
187                    let name = func
188                        .name
189                        .as_ref()
190                        .cloned()
191                        .unwrap_or_else(|| format!("unknown - {}", f.index()));
192                    // panic!(
193                    //     "Found a function that should be deleted but is still used: {:?} - {:?}",
194                    //     name, f
195                    // );
196                    tracing::error!(
197                        "Found a function that should be deleted but is still used: {:?} - {:?} - {:?}",
198                        name,
199                        f,
200                        _loc
201                    );
202                    if let Location::Code { func } = _loc {
203                        let func_name = module.funcs.get(func).name.as_ref().unwrap();
204                        tracing::error!("Function {:?} is used by {:?}", f, func_name);
205                    }
206
207                    // continue;
208                }
209
210                let func = module.funcs.get(f);
211                stack.used.types.insert(func.ty());
212
213                match &func.kind {
214                    FunctionKind::Local(func) => {
215                        let mut visitor = UsedVisitor {
216                            cur_func: f,
217                            stack: &mut stack,
218                        };
219                        dfs_in_order(&mut visitor, func, func.entry_block());
220                    }
221                    FunctionKind::Import(_) => {}
222                    FunctionKind::Uninitialized(_) => unreachable!(),
223                }
224            }
225
226            while let Some(t) = stack.tables.pop() {
227                for elem in module.tables.get(t).elem_segments.iter() {
228                    stack.push_element(*elem);
229                }
230            }
231
232            while let Some(t) = stack.globals.pop() {
233                match &module.globals.get(t).kind {
234                    GlobalKind::Import(_) => {}
235                    GlobalKind::Local(ConstExpr::Global(global)) => {
236                        stack.push_global(*global);
237                    }
238                    GlobalKind::Local(ConstExpr::RefFunc(func)) => {
239                        stack.push_func(*func, Location::Global { global: t });
240                    }
241                    GlobalKind::Local(ConstExpr::Value(_))
242                    | GlobalKind::Local(ConstExpr::RefNull(_)) => {}
243                }
244            }
245
246            while let Some(t) = stack.memories.pop() {
247                for data in &module.memories.get(t).data_segments {
248                    stack.push_data(*data);
249                }
250            }
251
252            while let Some(d) = stack.data.pop() {
253                let d = module.data.get(d);
254                if let DataKind::Active { memory, offset } = &d.kind {
255                    stack.push_memory(*memory);
256                    if let ConstExpr::Global(g) = offset {
257                        stack.push_global(*g);
258                    }
259                }
260            }
261
262            while let Some(e) = stack.elements.pop() {
263                let e = module.elements.get(e);
264                if let ElementItems::Functions(function_ids) = &e.items {
265                    function_ids.iter().for_each(|f| {
266                        stack.push_func(*f, Location::Element { element: e.id() });
267                    });
268                }
269                if let ElementItems::Expressions(RefType::Funcref, items) = &e.items {
270                    for item in items {
271                        match item {
272                            ConstExpr::Global(g) => {
273                                stack.push_global(*g);
274                            }
275                            ConstExpr::RefFunc(f) => {
276                                stack.push_func(*f, Location::Element { element: e.id() });
277                            }
278                            _ => {}
279                        }
280                    }
281                }
282                if let ElementKind::Active { offset, table } = &e.kind {
283                    if let ConstExpr::Global(g) = offset {
284                        stack.push_global(*g);
285                    }
286                    stack.push_table(*table);
287                }
288            }
289        }
290
291        // Wabt seems to have weird behavior where a `data` segment, if present
292        // even if passive, requires a `memory` declaration. Our GC pass is
293        // pretty aggressive and if you have a passive data segment and only
294        // `data.drop` instructions you technically don't need the `memory`.
295        // Let's keep `wabt` passing though and just say that if there are data
296        // segments kept, but no memories, then we try to add the first memory,
297        // if any, to the used set.
298        if !stack.used.data.is_empty() && stack.used.memories.is_empty() {
299            if let Some(mem) = module.memories.iter().next() {
300                stack.used.memories.insert(mem.id());
301            }
302        }
303
304        stack.used
305    }
306}
307
308struct UsedVisitor<'a> {
309    cur_func: FunctionId,
310    stack: &'a mut Roots,
311}
312
313impl Visitor<'_> for UsedVisitor<'_> {
314    fn visit_function_id(&mut self, &func: &FunctionId) {
315        self.stack.push_func(
316            func,
317            Location::Code {
318                func: self.cur_func,
319            },
320        );
321    }
322
323    fn visit_memory_id(&mut self, &m: &MemoryId) {
324        self.stack.push_memory(m);
325    }
326
327    fn visit_global_id(&mut self, &g: &GlobalId) {
328        self.stack.push_global(g);
329    }
330
331    fn visit_table_id(&mut self, &t: &TableId) {
332        self.stack.push_table(t);
333    }
334
335    fn visit_type_id(&mut self, &t: &TypeId) {
336        self.stack.used.types.insert(t);
337    }
338
339    fn visit_data_id(&mut self, &d: &DataId) {
340        self.stack.push_data(d);
341    }
342
343    fn visit_element_id(&mut self, &e: &ElementId) {
344        self.stack.push_element(e);
345    }
346}