portal_pc_waffle/ir/
module.rs

1use alloc::collections::BTreeMap;
2use alloc::string::String;
3use core::default;
4use core::iter::{empty, once};
5use impl_trait_for_tuples::impl_for_tuples;
6
7use super::{
8    ControlTag, Func, FuncDecl, Global, HeapType, Memory, ModuleDisplay, Signature, StorageType,
9    Table, Type, WithMutablility,
10};
11use crate::entity::{EntityRef, EntityVec};
12use crate::ir::{Debug, DebugMap, FunctionBody};
13use crate::{backend, frontend};
14use alloc::vec;
15use alloc::vec::Vec;
16use anyhow::Result;
17use either::Either;
18use indexmap::IndexMap;
19
20pub use crate::frontend::FrontendOptions;
21
22#[derive(Clone, Debug)]
23pub struct Module<'a> {
24    /// The original Wasm module this module was parsed from, if
25    /// any. Used only for "lazy function bodies", which retain a
26    /// range that can refer into this slice.
27    pub orig_bytes: Option<&'a [u8]>,
28    /// The functions in this module: imports, un-expanded ("lazily
29    /// parsed") functions, functions as IR, or IR compiled into new
30    /// bytecode.
31    pub funcs: EntityVec<Func, FuncDecl<'a>>,
32    /// Type signatures, referred to by `funcs`, `imports` and
33    /// `exports`.
34    pub signatures: EntityVec<Signature, SignatureData>,
35    /// Global variables in this module.
36    pub globals: EntityVec<Global, GlobalData>,
37    /// Tables in this module.
38    pub tables: EntityVec<Table, TableData>,
39    /// Imports into this module. Function imports must also have an
40    /// entry at the appropriate function index in `funcs`.
41    pub imports: Vec<Import>,
42    /// Exports from this module.
43    pub exports: Vec<Export>,
44    /// Memories/heapds that this module contains.
45    pub memories: EntityVec<Memory, MemoryData>,
46    /// Control tags that this module contains
47    pub control_tags: EntityVec<ControlTag, ControlTagData>,
48    /// The "start function" invoked at instantiation, if any.
49    pub start_func: Option<Func>,
50    /// Debug-info associated with function bodies: interning pools
51    /// for source file names and locations in those files.
52    pub debug: Debug,
53    /// Maps from original Wasm bytecode offsets to source locations.
54    pub debug_map: DebugMap,
55    pub custom_sections: BTreeMap<String, Vec<u8>>,
56}
57#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
58pub struct ControlTagData {
59    ///The signature used when invoking this tag
60    pub sig: Signature,
61}
62#[non_exhaustive]
63#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
64pub enum SignatureData {
65    Func {
66        /// Parameters: a Wasm function may have zero or more primitive
67        /// types as parameters.
68        params: Vec<Type>,
69        /// Returns: a Wasm function (when using the multivalue extension,
70        /// which we assume to be present) may have zero or more primitive
71        /// types as return values.
72        returns: Vec<Type>,
73        ///Is this signature shared
74        shared: bool,
75    },
76    Struct {
77        ///The fields of the struct
78        fields: Vec<WithMutablility<StorageType>>,
79        ///Is this signature shared
80        shared: bool,
81    },
82    Array {
83        ///The element type
84        ty: WithMutablility<StorageType>,
85        ///Is this signature shared
86        shared: bool,
87    },
88    Import {
89        like: HeapType,
90        shared: bool,
91    },
92    #[default]
93    None,
94}
95
96impl SignatureData {
97    pub fn shared(&self) -> bool {
98        match self {
99            SignatureData::Func {
100                params,
101                returns,
102                shared,
103            } => *shared,
104            SignatureData::Struct { fields, shared } => *shared,
105            SignatureData::Array { ty, shared } => *shared,
106            SignatureData::Import { like, shared } => *shared,
107            SignatureData::None => todo!(),
108        }
109    }
110}
111
112/// The size of a single Wasm page, used in memory definitions.
113pub const WASM_PAGE: usize = 0x1_0000; // 64KiB
114
115/// A memory definition.
116#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
117pub struct MemoryData {
118    /// How many Wasm pages (64KiB size) in the initial memory size?
119    pub initial_pages: usize,
120    /// How many Wasm pages (64KiB size) in the maximum memory size?
121    pub maximum_pages: Option<usize>,
122    /// Initialization data (initial image) for this memory.
123    pub segments: Vec<MemorySegment>,
124    pub memory64: bool,
125    pub shared: bool,
126    pub page_size_log2: Option<u32>,
127}
128
129#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
130pub struct MemorySegment {
131    /// The offset of this data.
132    pub offset: usize,
133    /// The data, overlaid on previously-existing data at this offset.
134    pub data: Vec<u8>,
135}
136
137#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
138pub struct TableData {
139    /// The type of element in this table.
140    pub ty: Type,
141    pub initial: u64,
142    /// The maximum size (in elements), if any, of this table.
143    pub max: Option<u64>,
144    /// If this is a table of function references, the initial
145    /// contents of the table. `null` funcrefs are represented by
146    /// `Func::invalid()`.
147    pub func_elements: Option<Vec<Func>>,
148    pub table64: bool,
149}
150
151#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
152pub struct GlobalData {
153    /// The type of this global variable.
154    pub ty: Type,
155    /// The initial value of this global variable, as a bundle of 64
156    /// bits (all primitive types, `i32`/`i64`/`f32`/`f64`, can be
157    /// represented in this way).
158    pub value: Option<u64>,
159    /// Whether this global variable is mutable.
160    pub mutable: bool,
161}
162
163impl From<&wasmparser::SubType> for SignatureData {
164    fn from(fty: &wasmparser::SubType) -> Self {
165        match &fty.composite_type.inner {
166            wasmparser::CompositeInnerType::Func(func_type) => Self::Func {
167                params: func_type
168                    .params()
169                    .iter()
170                    .map(|&ty| ty.into())
171                    .collect::<Vec<Type>>(),
172                returns: func_type
173                    .results()
174                    .iter()
175                    .map(|&ty| ty.into())
176                    .collect::<Vec<Type>>(),
177                shared: fty.composite_type.shared,
178            },
179            wasmparser::CompositeInnerType::Array(array_type) => Self::Array {
180                ty: array_type.0.clone().into(),
181                shared: fty.composite_type.shared,
182            },
183            wasmparser::CompositeInnerType::Struct(struct_type) => Self::Struct {
184                fields: struct_type.fields.iter().map(|&ty| ty.into()).collect(),
185                shared: fty.composite_type.shared,
186            },
187            _ => todo!(),
188        }
189    }
190}
191impl From<wasmparser::SubType> for SignatureData {
192    fn from(fty: wasmparser::SubType) -> Self {
193        (&fty).into()
194    }
195}
196
197impl From<&SignatureData> for wasm_encoder::SubType {
198    fn from(value: &SignatureData) -> Self {
199        match value {
200            SignatureData::Func {
201                params,
202                returns,
203                shared,
204            } => wasm_encoder::SubType {
205                is_final: true,
206                supertype_idx: None,
207                composite_type: wasm_encoder::CompositeType {
208                    inner: wasm_encoder::CompositeInnerType::Func(wasm_encoder::FuncType::new(
209                        params.iter().cloned().map(|a| a.into()),
210                        returns.iter().cloned().map(|a| a.into()),
211                    )),
212                    shared: *shared,
213                },
214            },
215            SignatureData::None => todo!(),
216            SignatureData::Struct { fields, shared } => wasm_encoder::SubType {
217                is_final: true,
218                supertype_idx: None,
219                composite_type: wasm_encoder::CompositeType {
220                    inner: wasm_encoder::CompositeInnerType::Struct(wasm_encoder::StructType {
221                        fields: fields.iter().cloned().map(|a| a.into()).collect(),
222                    }),
223                    shared: *shared,
224                },
225            },
226            SignatureData::Array { ty, shared } => wasm_encoder::SubType {
227                is_final: true,
228                supertype_idx: None,
229                composite_type: wasm_encoder::CompositeType {
230                    inner: wasm_encoder::CompositeInnerType::Array(wasm_encoder::ArrayType(
231                        ty.clone().into(),
232                    )),
233                    shared: *shared,
234                },
235            },
236            SignatureData::Import { like, shared } => todo!(),
237        }
238    }
239}
240
241impl Signature {
242    pub fn is_backref(&self, module: &Module) -> bool {
243        return match &module.signatures[*self] {
244            SignatureData::Func {
245                params, returns, ..
246            } => params
247                .iter()
248                .chain(returns.iter())
249                .flat_map(|a| a.sigs())
250                .any(|sig| sig.index() >= self.index()),
251            _ => false,
252        };
253    }
254}
255
256impl Type {
257    pub fn sigs<'a>(&'a self) -> impl Iterator<Item = Signature> + 'a {
258        match self {
259            Type::Heap(h) => match &h.value {
260                HeapType::Sig { sig_index } => Either::Right(once(*sig_index)),
261                _ => Either::Left(empty()),
262            },
263            _ => Either::Left(empty()),
264        }
265    }
266}
267
268#[derive(Clone, Debug)]
269pub struct Import {
270    /// The name of the module the import comes from.
271    pub module: String,
272    /// The name of the export within that module that this import
273    /// comes from.
274    pub name: String,
275    /// The kind of import and its specific entity index.
276    pub kind: ImportKind,
277}
278
279#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
280#[non_exhaustive]
281pub enum ImportKind {
282    /// An import of a table.
283    Table(Table),
284    /// An import of a function.
285    Func(Func),
286    /// An import of a global.
287    Global(Global),
288    /// An import of a memory.
289    Memory(Memory),
290    /// An import of a control tag
291    ControlTag(ControlTag),
292    /// An import of a type
293    Type(Signature),
294}
295
296impl core::fmt::Display for ImportKind {
297    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
298        match self {
299            ImportKind::Table(table) => write!(f, "{}", table)?,
300            ImportKind::Func(func) => write!(f, "{}", func)?,
301            ImportKind::Global(global) => write!(f, "{}", global)?,
302            ImportKind::Memory(mem) => write!(f, "{}", mem)?,
303            ImportKind::ControlTag(control_tag) => write!(f, "{}", control_tag)?,
304            ImportKind::Type(t) => write!(f, "{}", t)?,
305        }
306        Ok(())
307    }
308}
309
310#[derive(Clone, Debug)]
311pub struct Export {
312    /// The name of this export.
313    pub name: String,
314    /// The kind of export and its specific entity index.
315    pub kind: ExportKind,
316}
317
318#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
319#[non_exhaustive]
320pub enum ExportKind {
321    /// An export of a table.
322    Table(Table),
323    /// An export of a function.
324    Func(Func),
325    /// An export of a global.
326    Global(Global),
327    /// An export of a memory.
328    Memory(Memory),
329    /// An export of a control tag
330    ControlTag(ControlTag),
331    ///An export of a type
332    Type(Signature),
333}
334
335pub fn x2i(x: ExportKind) -> ImportKind {
336    match x {
337        ExportKind::Table(a) => ImportKind::Table(a),
338        ExportKind::Func(a) => ImportKind::Func(a),
339        ExportKind::Global(a) => ImportKind::Global(a),
340        ExportKind::Memory(a) => ImportKind::Memory(a),
341        ExportKind::ControlTag(control_tag) => ImportKind::ControlTag(control_tag),
342        ExportKind::Type(t) => ImportKind::Type(t),
343    }
344}
345pub fn i2x(x: ImportKind) -> ExportKind {
346    match x {
347        ImportKind::Table(a) => ExportKind::Table(a),
348        ImportKind::Func(a) => ExportKind::Func(a),
349        ImportKind::Global(a) => ExportKind::Global(a),
350        ImportKind::Memory(a) => ExportKind::Memory(a),
351        ImportKind::ControlTag(control_tag) => ExportKind::ControlTag(control_tag),
352        ImportKind::Type(t) => ExportKind::Type(t),
353    }
354}
355
356impl core::fmt::Display for ExportKind {
357    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
358        match self {
359            ExportKind::Table(table) => write!(f, "{}", table)?,
360            ExportKind::Func(func) => write!(f, "{}", func)?,
361            ExportKind::Global(global) => write!(f, "{}", global)?,
362            ExportKind::Memory(memory) => write!(f, "{}", memory)?,
363            ExportKind::ControlTag(control_tag) => write!(f, "{}", control_tag)?,
364            ExportKind::Type(t) => write!(f, "{}", t)?,
365        }
366        Ok(())
367    }
368}
369
370impl<'a> Module<'a> {
371    // pub(crate) fn with_orig_bytes(orig_bytes: &'a [u8]) -> Module<'a> {
372    //     Module {
373    //         orig_bytes,
374    //         funcs: EntityVec::default(),
375    //         signatures: EntityVec::default(),
376    //         globals: EntityVec::default(),
377    //         tables: EntityVec::default(),
378    //         imports: vec![],
379    //         exports: vec![],
380    //         memories: EntityVec::default(),
381    //         start_func: None,
382    //         debug: Debug::default(),
383    //         debug_map: DebugMap::default(),
384    //         custom_sections: BTreeMap::default(),
385    //     }
386    // }
387
388    pub fn empty() -> Module<'static> {
389        Module {
390            orig_bytes: None,
391            funcs: EntityVec::default(),
392            signatures: EntityVec::default(),
393            globals: EntityVec::default(),
394            tables: EntityVec::default(),
395            imports: vec![],
396            exports: vec![],
397            memories: EntityVec::default(),
398            start_func: None,
399            debug: Debug::default(),
400            debug_map: DebugMap::default(),
401            custom_sections: Default::default(),
402            control_tags: Default::default(),
403        }
404    }
405
406    /// Parse a WebAssembly module, as a slice of bytes in memory,
407    /// into a waffle Module ready to be manipulated and recompile.
408    pub fn from_wasm_bytes(bytes: &'a [u8], options: &FrontendOptions) -> Result<Self> {
409        frontend::wasm_to_ir(bytes, options)
410    }
411
412    /// Take this module and strip its reference to the original
413    /// bytes, producing a module with the same logical contents.
414    ///
415    /// Note that this has a few side-effects:
416    /// - Any (non-debug) custom sections are lost; i.e., they will
417    ///   not be roundtripped from the original Wasm module.
418    /// - All function bodies are expanded to IR so they can be
419    ///   recompiled into new bytecode. The bytecode should be
420    ///   equivalent, but will not literally be the same bytecode as the
421    ///   original module.
422    pub fn without_orig_bytes(self) -> Module<'static> {
423        Module {
424            orig_bytes: None,
425            funcs: EntityVec::from(
426                self.funcs
427                    .into_vec()
428                    .into_iter()
429                    .map(|decl| decl.without_orig_bytes())
430                    .collect::<Vec<_>>(),
431            ),
432            signatures: self.signatures,
433            globals: self.globals,
434            tables: self.tables,
435            imports: self.imports,
436            exports: self.exports,
437            memories: self.memories,
438            start_func: self.start_func,
439            debug: self.debug,
440            debug_map: self.debug_map,
441            custom_sections: self.custom_sections,
442            control_tags: self.control_tags,
443        }
444    }
445}
446
447impl<'a> Module<'a> {
448    // pub(crate) fn frontend_add_table(&mut self, ty: Type, initial: u64, max: Option<u64>) -> Table {
449    //     let func_elements = Some(vec![]);
450    //     self.tables.push(TableData {
451    //         ty,
452    //         func_elements,
453    //         initial,
454    //         max,
455    //     })
456    // }
457
458    // pub fn from_wasm_bytes(bytes: &'a [u8], options: &FrontendOptions) -> Result<Self> {
459    //     frontend::wasm_to_ir(bytes, options)
460    // }
461
462    pub fn to_wasm_bytes(&self) -> Result<Vec<u8>> {
463        backend::compile(self).map(|a| a.finish())
464    }
465    pub fn to_encoded_module(&self) -> Result<wasm_encoder::Module> {
466        backend::compile(self)
467    }
468
469    pub fn per_func_body<F: Fn(&mut FunctionBody)>(&mut self, f: F) {
470        for func_decl in self.funcs.values_mut() {
471            if let Some(body) = func_decl.body_mut() {
472                f(body);
473            }
474        }
475    }
476
477    pub fn take_per_func_body<F: FnMut(&mut Self, &mut FunctionBody)>(&mut self, mut f: F) {
478        for func_decl in self.funcs.iter().collect::<Vec<_>>() {
479            let mut x = core::mem::take(&mut self.funcs[func_decl]);
480            if let Some(body) = x.body_mut() {
481                f(self, body);
482            }
483            self.funcs[func_decl] = x;
484        }
485    }
486
487    pub fn try_per_func_body<F: FnMut(&mut FunctionBody) -> Result<(), E>, E>(
488        &mut self,
489        mut f: F,
490    ) -> Result<(), E> {
491        for func_decl in self.funcs.values_mut() {
492            if let Some(body) = func_decl.body_mut() {
493                f(body)?;
494            }
495        }
496        Ok(())
497    }
498
499    pub fn try_take_per_func_body<F: FnMut(&mut Self, &mut FunctionBody) -> Result<(), E>, E>(
500        &mut self,
501        mut f: F,
502    ) -> Result<(), E> {
503        for func_decl in self.funcs.iter().collect::<Vec<_>>() {
504            let mut x = core::mem::take(&mut self.funcs[func_decl]);
505            let mut y = None;
506            if let Some(body) = x.body_mut() {
507                y = Some(f(self, body));
508            }
509            self.funcs[func_decl] = x;
510            if let Some(z) = y {
511                z?;
512            }
513        }
514        Ok(())
515    }
516
517    /// Expand a function body, parsing its lazy reference to original
518    /// bytecode into IR if needed.
519    pub fn expand_func<'b>(&'b mut self, id: Func) -> Result<&'b mut FuncDecl<'a>> {
520        if let FuncDecl::Lazy(..) = self.funcs[id] {
521            // End the borrow. This is cheap (a slice copy).
522            let mut func = self.funcs[id].clone();
523            func.parse(self)?;
524            self.funcs[id] = func;
525        }
526        Ok(&mut self.funcs[id])
527    }
528
529    /// Clone a function body *without* expanding it, and return a
530    /// *new* function body with IR expanded. Useful when a tool
531    /// appends new functions that are processed versions of an
532    /// original function (which itself must remain as well).
533    pub fn clone_and_expand_body(&self, id: Func) -> Result<FunctionBody> {
534        let mut body = self.funcs[id].clone();
535        body.parse(self)?;
536        Ok(match body {
537            FuncDecl::Body(_, _, body) => body,
538            _ => unreachable!(),
539        })
540    }
541
542    /// For all functions that are lazy references to initial
543    /// bytecode, expand them into IR.
544    pub fn expand_all_funcs(&mut self) -> Result<()> {
545        for id in 0..self.funcs.len() {
546            let id = Func::new(id);
547            self.expand_func(id)?;
548        }
549        Ok(())
550    }
551
552    /// Return a wrapper that implements Display on this module,
553    /// pretty-printing it as textual IR.
554    pub fn display<'b>(&'b self) -> ModuleDisplay<'b>
555    where
556        'b: 'a,
557    {
558        ModuleDisplay { module: self }
559    }
560
561    /// Internal (used during parsing): create an empty module, with
562    /// the given slice of original Wasm bytecode. Used during parsing
563    /// and meant to be filled in as the Wasm bytecode is processed.
564    pub(crate) fn with_orig_bytes(orig_bytes: &'a [u8]) -> Module<'a> {
565        Module {
566            orig_bytes: Some(orig_bytes),
567            funcs: EntityVec::default(),
568            signatures: EntityVec::default(),
569            globals: EntityVec::default(),
570            tables: EntityVec::default(),
571            imports: vec![],
572            exports: vec![],
573            memories: EntityVec::default(),
574            start_func: None,
575            debug: Debug::default(),
576            debug_map: DebugMap::default(),
577            custom_sections: BTreeMap::default(),
578            control_tags: EntityVec::default(),
579        }
580    }
581}
582#[impl_for_tuples(12)]
583pub trait FuncCollector {
584    fn collect_func(&mut self, f: Func);
585}
586impl<'a, T: FuncCollector> FuncCollector for &'a mut T {
587    fn collect_func(&mut self, f: Func) {
588        FuncCollector::collect_func(&mut **self, f);
589    }
590}
591
592#[cfg(test)]
593mod test {
594    use super::*;
595
596    #[test]
597    fn empty_module_valid() {
598        let module = Module::empty();
599        let _ = module.to_wasm_bytes().unwrap();
600    }
601}