wit_component_update/
metadata.rs

1//! Definition for encoding of custom sections within core wasm modules of
2//! component-model related data.
3//!
4//! When creating a component from a source language the high-level process for
5//! doing this is that code will be generated into the source language by
6//! `wit-bindgen` or a similar tool which will be compiled down to core wasm.
7//! The core wasm file is then fed into `wit-component` and a component is
8//! created. This means that the componentization process is decoupled from the
9//! binding generation process and intentionally affords for linking together
10//! libraries into the main core wasm module that import different interfaces.
11//!
12//! The purpose of this module is to define an intermediate format to reside in
13//! a custom section in the core wasm output. This intermediate format is
14//! carried through the wasm linker through a custom section whose name starts
15//! with `component-type`. This custom section is created
16//! per-language-binding-generation and consumed by slurping up all the
17//! sections during the component creation process.
18//!
19//! Currently the encoding of this custom section is itself a component. The
20//! component has a single export which is a component type which represents the
21//! `world` that was bound during bindings generation. This single export is
22//! used to decode back into a `Resolve` with a WIT representation.
23//!
24//! Currently the component additionally has a custom section named
25//! `wit-component-encoding` (see `CUSTOM_SECTION_NAME`). This section is
26//! currently defined as 2 bytes:
27//!
28//! * The first byte is `CURRENT_VERSION` to help protect against future and
29//!   past changes.
30//! * The second byte indicates the string encoding used for imports/exports as
31//!   part of the bindings process. The mapping is defined by
32//!   `encode_string_encoding`.
33//!
34//! This means that the top-level `encode` function takes a `Resolve`, a
35//! `WorldId`, and a `StringEncoding`. Note that the top-level `decode` function
36//! is slightly difference because it's taking all custom sections in a core
37//! wasm binary, possibly from multiple invocations of bindgen, and unioning
38//! them all together. This means that the output is a `Bindgen` which
39//! represents the union of all previous bindings.
40//!
41//! The dual of `encode` is the `decode_custom_section` fucntion which decodes
42//! the three arguments originally passed to `encode`.
43
44use crate::validation::BARE_FUNC_MODULE_NAME;
45use crate::{DecodedWasm, StringEncoding};
46use anyhow::{bail, Context, Result};
47use indexmap::IndexMap;
48use std::borrow::Cow;
49use wasm_encoder::{
50    ComponentBuilder, ComponentExportKind, ComponentType, ComponentTypeRef, CustomSection,
51};
52use wasm_metadata::Producers;
53use wasmparser::{BinaryReader, Encoding, Parser, Payload};
54use wit_parser::{Package, PackageName, Resolve, World, WorldId, WorldItem};
55
56const CURRENT_VERSION: u8 = 0x04;
57const CUSTOM_SECTION_NAME: &str = "wit-component-encoding";
58
59/// The result of decoding binding information from a WebAssembly binary.
60///
61/// This structure is returned by [`decode`] and represents the interface of a
62/// WebAssembly binary.
63pub struct Bindgen {
64    /// Interface and type information for this binary.
65    pub resolve: Resolve,
66    /// The world that was bound.
67    pub world: WorldId,
68    /// Metadata about this specific module that was bound.
69    pub metadata: ModuleMetadata,
70    /// Producer information about tools used to produce this specific module.
71    pub producers: Option<Producers>,
72}
73
74impl Default for Bindgen {
75    fn default() -> Bindgen {
76        let mut resolve = Resolve::default();
77        let package = resolve.packages.alloc(Package {
78            name: PackageName {
79                namespace: "root".to_string(),
80                name: "root".to_string(),
81                version: None,
82            },
83            docs: Default::default(),
84            interfaces: Default::default(),
85            worlds: Default::default(),
86        });
87        let world = resolve.worlds.alloc(World {
88            name: "root".to_string(),
89            docs: Default::default(),
90            imports: Default::default(),
91            exports: Default::default(),
92            includes: Default::default(),
93            include_names: Default::default(),
94            package: Some(package),
95        });
96        resolve.packages[package]
97            .worlds
98            .insert("root".to_string(), world);
99        Bindgen {
100            resolve,
101            world,
102            metadata: ModuleMetadata::default(),
103            producers: None,
104        }
105    }
106}
107
108/// Module-level metadata that's specific to one core WebAssembly module. This
109/// is extracted with a [`Bindgen`].
110#[derive(Default)]
111pub struct ModuleMetadata {
112    /// Per-function options imported into the core wasm module, currently only
113    /// related to string encoding.
114    pub import_encodings: IndexMap<(String, String), StringEncoding>,
115
116    /// Per-function options exported from the core wasm module, currently only
117    /// related to string encoding.
118    pub export_encodings: IndexMap<String, StringEncoding>,
119}
120
121/// This function will parse the `wasm` binary given as input and return a
122/// [`Bindgen`] which extracts the custom sections describing component-level
123/// types from within the binary itself.
124///
125/// This is used to parse the output of `wit-bindgen`-generated modules and is
126/// one of the earliest phases in transitioning such a module to a component.
127/// The extraction here provides the metadata necessary to continue the process
128/// later on.
129///
130/// This will return an error if `wasm` is not a valid WebAssembly module.
131///
132/// Note that a "stripped" binary where `component-type` sections are removed
133/// is returned as well to embed within a component.
134pub fn decode(wasm: &[u8]) -> Result<(Vec<u8>, Bindgen)> {
135    let mut ret = Bindgen::default();
136    let mut new_module = wasm_encoder::Module::new();
137
138    for payload in wasmparser::Parser::new(0).parse_all(wasm) {
139        let payload = payload.context("decoding item in module")?;
140        match payload {
141            wasmparser::Payload::CustomSection(cs) if cs.name().starts_with("component-type") => {
142                let data = Bindgen::decode_custom_section(cs.data())
143                    .with_context(|| format!("decoding custom section {}", cs.name()))?;
144                ret.merge(data)
145                    .with_context(|| format!("updating metadata for section {}", cs.name()))?;
146            }
147            wasmparser::Payload::Version { encoding, .. } if encoding != Encoding::Module => {
148                bail!("decoding a component is not supported")
149            }
150            _ => {
151                if let Some((id, range)) = payload.as_section() {
152                    new_module.section(&wasm_encoder::RawSection {
153                        id,
154                        data: &wasm[range],
155                    });
156                }
157            }
158        }
159    }
160
161    Ok((new_module.finish(), ret))
162}
163
164/// Creates a `component-type*` custom section to be decoded by `decode` above.
165///
166/// This is primarily created by wit-bindgen-based guest generators to embed
167/// into the final core wasm binary. The core wasm binary is later fed
168/// through `wit-component` to produce the actual component where this returned
169/// section will be decoded.
170pub fn encode(
171    resolve: &Resolve,
172    world: WorldId,
173    string_encoding: StringEncoding,
174    extra_producers: Option<&Producers>,
175) -> Result<Vec<u8>> {
176    let ty = crate::encoding::encode_world(resolve, world)?;
177
178    let world = &resolve.worlds[world];
179    let mut outer_ty = ComponentType::new();
180    outer_ty.ty().component(&ty);
181    outer_ty.export(
182        &resolve.id_of_name(world.package.unwrap(), &world.name),
183        ComponentTypeRef::Component(0),
184    );
185
186    let mut builder = ComponentBuilder::default();
187
188    let string_encoding = encode_string_encoding(string_encoding);
189    builder.custom_section(&CustomSection {
190        name: CUSTOM_SECTION_NAME.into(),
191        data: Cow::Borrowed(&[CURRENT_VERSION, string_encoding]),
192    });
193
194    let ty = builder.type_component(&outer_ty);
195    builder.export(&world.name, ComponentExportKind::Type, ty, None);
196
197    let mut producers = crate::base_producers();
198    if let Some(p) = extra_producers {
199        producers.merge(&p);
200    }
201    builder.raw_custom_section(&producers.raw_custom_section());
202    Ok(builder.finish())
203}
204
205fn decode_custom_section(wasm: &[u8]) -> Result<(Resolve, WorldId, StringEncoding)> {
206    let (resolve, world) = wit_parser::decoding::decode_world(wasm)?;
207    let mut custom_section = None;
208
209    for payload in Parser::new(0).parse_all(wasm) {
210        match payload? {
211            Payload::CustomSection(s) if s.name() == CUSTOM_SECTION_NAME => {
212                custom_section = Some(s.data());
213            }
214            _ => {}
215        }
216    }
217    let string_encoding = match custom_section {
218        None => bail!("missing custom section of name `{CUSTOM_SECTION_NAME}`"),
219        Some([CURRENT_VERSION, byte]) => decode_string_encoding(*byte)?,
220        Some([]) => bail!("custom section `{CUSTOM_SECTION_NAME}` in unknown format"),
221        Some([version, ..]) => bail!(
222            "custom section `{CUSTOM_SECTION_NAME}` uses format {version} but only {CURRENT_VERSION} is supported"
223        ),
224    };
225    Ok((resolve, world, string_encoding))
226}
227
228fn encode_string_encoding(e: StringEncoding) -> u8 {
229    match e {
230        StringEncoding::UTF8 => 0x00,
231        StringEncoding::UTF16 => 0x01,
232        StringEncoding::CompactUTF16 => 0x02,
233    }
234}
235
236fn decode_string_encoding(byte: u8) -> Result<StringEncoding> {
237    match byte {
238        0x00 => Ok(StringEncoding::UTF8),
239        0x01 => Ok(StringEncoding::UTF16),
240        0x02 => Ok(StringEncoding::CompactUTF16),
241        byte => bail!("invalid string encoding {byte:#x}"),
242    }
243}
244
245impl Bindgen {
246    fn decode_custom_section(data: &[u8]) -> Result<Bindgen> {
247        let wasm;
248        let world;
249        let resolve;
250        let encoding;
251
252        let mut reader = BinaryReader::new(data);
253        match reader.read_u8()? {
254            // Historical 0x03 format where the support here will be deleted in
255            // the future
256            0x03 => {
257                encoding = decode_string_encoding(reader.read_u8()?)?;
258                let world_name = reader.read_string()?;
259                wasm = &data[reader.original_position()..];
260
261                let (r, pkg) = match crate::decode(wasm)? {
262                    DecodedWasm::WitPackage(resolve, pkg) => (resolve, pkg),
263                    DecodedWasm::Component(..) => bail!("expected an encoded wit package"),
264                };
265                resolve = r;
266                world = resolve.packages[pkg].worlds[world_name];
267            }
268
269            // Current format where `data` is a wasm component itself.
270            _ => {
271                wasm = data;
272                (resolve, world, encoding) = decode_custom_section(wasm)?;
273            }
274        }
275
276        Ok(Bindgen {
277            metadata: ModuleMetadata::new(&resolve, world, encoding),
278            producers: wasm_metadata::Producers::from_wasm(wasm)?,
279            resolve,
280            world,
281        })
282    }
283
284    /// Merges another `BindgenMetadata` into this one.
285    ///
286    /// This operation is intended to be akin to "merging worlds" when the
287    /// abstraction level for that is what we're working at here. For now the
288    /// merge operation only succeeds if the two metadata descriptions are
289    /// entirely disjoint.
290    ///
291    /// Note that at this time there's no support for changing string encodings
292    /// between metadata.
293    pub fn merge(&mut self, other: Bindgen) -> Result<WorldId> {
294        let Bindgen {
295            resolve,
296            world,
297            metadata:
298                ModuleMetadata {
299                    import_encodings,
300                    export_encodings,
301                },
302            producers,
303        } = other;
304
305        let world = self
306            .resolve
307            .merge(resolve)
308            .context("failed to merge WIT package sets together")?
309            .worlds[world.index()];
310        self.resolve
311            .merge_worlds(world, self.world)
312            .context("failed to merge worlds from two documents")?;
313
314        for (name, encoding) in export_encodings {
315            let prev = self
316                .metadata
317                .export_encodings
318                .insert(name.clone(), encoding);
319            if let Some(prev) = prev {
320                if prev != encoding {
321                    bail!("conflicting string encodings specified for export `{name}`");
322                }
323            }
324        }
325        for ((module, name), encoding) in import_encodings {
326            let prev = self
327                .metadata
328                .import_encodings
329                .insert((module.clone(), name.clone()), encoding);
330            if let Some(prev) = prev {
331                if prev != encoding {
332                    bail!("conflicting string encodings specified for import `{module}::{name}`");
333                }
334            }
335        }
336        if let Some(producers) = producers {
337            if let Some(mine) = &mut self.producers {
338                mine.merge(&producers);
339            } else {
340                self.producers = Some(producers);
341            }
342        }
343
344        Ok(world)
345    }
346}
347
348impl ModuleMetadata {
349    /// Creates a new `ModuleMetadata` instance holding the given set of
350    /// interfaces which are expected to all use the `encoding` specified.
351    pub fn new(resolve: &Resolve, world: WorldId, encoding: StringEncoding) -> ModuleMetadata {
352        let mut ret = ModuleMetadata::default();
353
354        let world = &resolve.worlds[world];
355        for (name, item) in world.imports.iter() {
356            let name = resolve.name_world_key(name);
357            match item {
358                WorldItem::Function(_) => {
359                    let prev = ret
360                        .import_encodings
361                        .insert((BARE_FUNC_MODULE_NAME.to_string(), name.clone()), encoding);
362                    assert!(prev.is_none());
363                }
364                WorldItem::Interface(i) => {
365                    for (func, _) in resolve.interfaces[*i].functions.iter() {
366                        let prev = ret
367                            .import_encodings
368                            .insert((name.clone(), func.clone()), encoding);
369                        assert!(prev.is_none());
370                    }
371                }
372                WorldItem::Type(_) => {}
373            }
374        }
375
376        for (name, item) in world.exports.iter() {
377            let name = resolve.name_world_key(name);
378            match item {
379                WorldItem::Function(func) => {
380                    let name = func.core_export_name(None).into_owned();
381                    let prev = ret.export_encodings.insert(name.clone(), encoding);
382                    assert!(prev.is_none());
383                }
384                WorldItem::Interface(i) => {
385                    for (_, func) in resolve.interfaces[*i].functions.iter() {
386                        let name = func.core_export_name(Some(&name)).into_owned();
387                        let prev = ret.export_encodings.insert(name, encoding);
388                        assert!(prev.is_none());
389                    }
390                }
391                WorldItem::Type(_) => {}
392            }
393        }
394
395        ret
396    }
397}