javy_codegen/
lib.rs

1//! WebAssembly Code Generation for JavaScript
2//!
3//! This module provides functionality to emit Wasm modules which will run
4//! JavaScript source code with the QuickJS interpreter.
5//!
6//! Javy supports two main code generation paths:
7//!
8//! 1. Static code generation
9//! 2. Dynamic code generation
10//!
11//! ## Static code generation
12//!
13//! A single unit of code is generated, which is a Wasm module consisting of the
14//! bytecode representation of a given JavaScript program and the code for
15//! a particular version of the QuickJS engine compiled to Wasm.
16//!
17//! The generated Wasm module is self contained and the bytecode version matches
18//! the exact requirements of the embedded QuickJs engine.
19//!
20//! ## Dynamic code generation
21//!
22//! A single unit of code is generated, which is a Wasm module consisting of the
23//! bytecode representation of a given JavaScript program. The JavaScript
24//! bytecode is stored as part of the data section of the module which also
25//! contains instructions to execute that bytecode through dynamic linking
26//! at runtime.
27//!
28//! Dynamic code generation requires a plugin module to be used and linked
29//! against at runtime in order to execute the JavaScript bytecode. This
30//! operation involves carefully ensuring that a given plugin version matches
31//! the plugin version of the imports requested by the generated Wasm module
32//! as well as ensuring that any features available in the plugin match the
33//! features requsted by the JavaScript bytecode.
34//!
35//! ## Examples
36//!
37//! Simple Wasm module generation:
38//!
39//! ```no_run
40//! use std::path::Path;
41//! use javy_codegen::{Generator, LinkingKind, Plugin, JS};
42//!
43//! fn main() -> Result<(), Box<dyn std::error::Error>> {
44//!     // Load your target Javascript.
45//!     let js = JS::from_file(Path::new("example.js"))?;
46//!
47//!     // Load existing pre-initialized Javy plugin.
48//!     let plugin = Plugin::new_from_path(Path::new("example-plugin.wasm"))?;
49//!
50//!     // Configure code generator.
51//!     let mut generator = Generator::new(plugin);
52//!     generator.linking(LinkingKind::Static);
53//!
54//!     // Generate your Wasm module.
55//!     let wasm = generator.generate(&js);
56//!
57//!     Ok(())
58//! }
59//! ```
60//!
61//! ## Core concepts
62//! * [`Generator`] - The main entry point for generating Wasm modules.
63//! * [`Plugin`] - An initialized Javy plugin.
64//! * [`JS`] - JavaScript source code.
65//!
66//! ## Features
67//!
68//! * `plugin_internal` - Enables additional code generation options for
69//!   internal use. Please note that this flag enables an unstable feature. The
70//!   unstable API's exposed by this future may break in the future without
71//!   notice.
72
73use std::{fs, rc::Rc, sync::OnceLock};
74
75pub(crate) mod bytecode;
76pub(crate) mod exports;
77pub(crate) mod transform;
78
79pub(crate) mod js;
80pub(crate) mod plugin;
81pub(crate) mod wit;
82
83use crate::exports::Exports;
84pub use crate::js::JS;
85pub use crate::plugin::Plugin;
86pub use crate::wit::WitOptions;
87
88use transform::SourceCodeSection;
89use walrus::{
90    DataId, DataKind, ExportItem, FunctionBuilder, FunctionId, LocalId, MemoryId, Module, ValType,
91};
92use wasm_opt::{OptimizationOptions, ShrinkLevel};
93use wasmtime_wasi::{WasiCtxBuilder, p2::pipe::MemoryInputPipe};
94use wizer::{Linker, Wizer};
95
96use anyhow::Result;
97
98static STDIN_PIPE: OnceLock<MemoryInputPipe> = OnceLock::new();
99
100/// The kind of linking to use.
101#[derive(Debug, Clone, Default)]
102pub enum LinkingKind {
103    #[default]
104    /// Static linking
105    Static,
106    /// Dynamic linking
107    Dynamic,
108}
109
110/// Source code embedding options for the generated Wasm module.
111#[derive(Debug, Clone, Default)]
112pub enum SourceEmbedding {
113    #[default]
114    /// Embed the source code without compression.
115    Uncompressed,
116    /// Embed the source code with compression.
117    Compressed,
118    /// Don't embed the source code.
119    Omitted,
120}
121
122/// Identifiers used by the generated module.
123// This is an internal detail of this module.
124#[derive(Debug)]
125pub(crate) struct Identifiers {
126    cabi_realloc: FunctionId,
127    invoke: FunctionId,
128    memory: MemoryId,
129}
130
131impl Identifiers {
132    fn new(cabi_realloc: FunctionId, invoke: FunctionId, memory: MemoryId) -> Self {
133        Self {
134            cabi_realloc,
135            invoke,
136            memory,
137        }
138    }
139}
140
141/// Helper struct to keep track of bytecode metadata.
142// This is an internal detail of this module.
143#[derive(Debug)]
144pub(crate) struct BytecodeMetadata {
145    ptr: LocalId,
146    len: i32,
147    data_section: DataId,
148}
149
150impl BytecodeMetadata {
151    fn new(ptr: LocalId, len: i32, data_section: DataId) -> Self {
152        Self {
153            ptr,
154            len,
155            data_section,
156        }
157    }
158}
159
160/// Generator used to produce Wasm binaries from JS source code.
161#[derive(Debug, Default, Clone)]
162pub struct Generator {
163    /// Plugin to use.
164    pub(crate) plugin: Plugin,
165    /// What kind of linking to use when generating a module.
166    pub(crate) linking: LinkingKind,
167    /// Source code embedding option for the generated module.
168    pub(crate) source_embedding: SourceEmbedding,
169    /// WIT options for code generation.
170    pub(crate) wit_opts: WitOptions,
171    /// JavaScript function exports.
172    pub(crate) function_exports: Exports,
173    /// An optional JS runtime config provided as JSON bytes.
174    js_runtime_config: Vec<u8>,
175    /// The version string to include in the producers custom section.
176    producer_version: Option<String>,
177}
178
179impl Generator {
180    /// Create a new [`Generator`].
181    pub fn new(plugin: Plugin) -> Self {
182        Self {
183            plugin,
184            ..Self::default()
185        }
186    }
187
188    /// Set the kind of linking (default: [`LinkingKind::Static`])
189    pub fn linking(&mut self, linking: LinkingKind) -> &mut Self {
190        self.linking = linking;
191        self
192    }
193
194    /// Set the source embedding option (default: [`SourceEmbedding::Compressed`])
195    pub fn source_embedding(&mut self, source_embedding: SourceEmbedding) -> &mut Self {
196        self.source_embedding = source_embedding;
197        self
198    }
199
200    /// Set the wit options. (default: Empty [`WitOptions`])
201    pub fn wit_opts(&mut self, wit_opts: wit::WitOptions) -> &mut Self {
202        self.wit_opts = wit_opts;
203        self
204    }
205
206    #[cfg(feature = "plugin_internal")]
207    /// Set the JS runtime configuration options to pass to the module.
208    pub fn js_runtime_config(&mut self, js_runtime_config: Vec<u8>) -> &mut Self {
209        self.js_runtime_config = js_runtime_config;
210        self
211    }
212
213    /// Sets the version string to use in the producers custom section.
214    pub fn producer_version(&mut self, producer_version: String) -> &mut Self {
215        self.producer_version = Some(producer_version);
216        self
217    }
218}
219
220impl Generator {
221    /// Generate the starting module.
222    fn generate_initial_module(&self) -> Result<Module> {
223        let config = transform::module_config();
224        let module = match &self.linking {
225            LinkingKind::Static => {
226                // Copy config JSON into stdin for `initialize-runtime` function.
227                STDIN_PIPE
228                    .set(MemoryInputPipe::new(self.js_runtime_config.clone()))
229                    .unwrap();
230                let wasm = Wizer::new()
231                    .init_func("initialize-runtime")
232                    .make_linker(Some(Rc::new(move |engine| {
233                        let mut linker = Linker::new(engine);
234                        wasmtime_wasi::preview1::add_to_linker_sync(&mut linker, move |cx| {
235                            if cx.wasi_ctx.is_none() {
236                                // The underlying buffer backing the pipe is an Arc
237                                // so the cloning should be fast.
238                                let config = STDIN_PIPE.get().unwrap().clone();
239                                cx.wasi_ctx = Some(
240                                    WasiCtxBuilder::new()
241                                        .stdin(config)
242                                        .inherit_stdout()
243                                        .inherit_stderr()
244                                        .build_p1(),
245                                );
246                            }
247                            cx.wasi_ctx.as_mut().unwrap()
248                        })?;
249                        Ok(linker)
250                    })))?
251                    .wasm_bulk_memory(true)
252                    .run(self.plugin.as_bytes())?;
253                config.parse(&wasm)?
254            }
255            LinkingKind::Dynamic => Module::with_config(config),
256        };
257        Ok(module)
258    }
259
260    /// Resolve identifiers for functions and memory.
261    pub(crate) fn resolve_identifiers(&self, module: &mut Module) -> Result<Identifiers> {
262        match self.linking {
263            LinkingKind::Static => {
264                let cabi_realloc = module.exports.get_func("cabi_realloc")?;
265                let invoke = module.exports.get_func("invoke")?;
266                let ExportItem::Memory(memory) = module
267                    .exports
268                    .iter()
269                    .find(|e| e.name == "memory")
270                    .ok_or_else(|| anyhow::anyhow!("Missing memory export"))?
271                    .item
272                else {
273                    anyhow::bail!("Export with name memory must be of type memory")
274                };
275                Ok(Identifiers::new(cabi_realloc, invoke, memory))
276            }
277            LinkingKind::Dynamic => {
278                // All code by default is assumed to be linking against a default
279                // or a user provided plugin.
280                let import_namespace = self.plugin.import_namespace()?;
281
282                let cabi_realloc_type = module.types.add(
283                    &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
284                    &[ValType::I32],
285                );
286                let (cabi_realloc_fn_id, _) =
287                    module.add_import_func(&import_namespace, "cabi_realloc", cabi_realloc_type);
288
289                let invoke_params = [
290                    ValType::I32,
291                    ValType::I32,
292                    ValType::I32,
293                    ValType::I32,
294                    ValType::I32,
295                ]
296                .as_slice();
297                let invoke_type = module.types.add(invoke_params, &[]);
298                let (invoke_fn_id, _) =
299                    module.add_import_func(&import_namespace, "invoke", invoke_type);
300
301                let (memory_id, _) = module.add_import_memory(
302                    &import_namespace,
303                    "memory",
304                    false,
305                    false,
306                    0,
307                    None,
308                    None,
309                );
310
311                Ok(Identifiers::new(
312                    cabi_realloc_fn_id,
313                    invoke_fn_id,
314                    memory_id,
315                ))
316            }
317        }
318    }
319
320    /// Generate the main function.
321    fn generate_main(
322        &self,
323        module: &mut Module,
324        js: &js::JS,
325        imports: &Identifiers,
326    ) -> Result<BytecodeMetadata> {
327        let bytecode = bytecode::compile_source(&self.plugin, js.as_bytes())?;
328        let bytecode_len: i32 = bytecode.len().try_into()?;
329        let bytecode_data = module.data.add(DataKind::Passive, bytecode);
330
331        let mut main = FunctionBuilder::new(&mut module.types, &[], &[]);
332        let bytecode_ptr_local = module.locals.add(ValType::I32);
333        let mut instructions = main.func_body();
334        instructions
335            // Allocate memory in plugin instance for bytecode array.
336            .i32_const(0) // orig ptr
337            .i32_const(0) // orig size
338            .i32_const(1) // alignment
339            .i32_const(bytecode_len) // new size
340            .call(imports.cabi_realloc)
341            // Copy bytecode array into allocated memory.
342            .local_tee(bytecode_ptr_local) // save returned address to local and set as dest addr for mem.init
343            .i32_const(0) // offset into data segment for mem.init
344            .i32_const(bytecode_len) // size to copy from data segment
345            // top-2: dest addr, top-1: offset into source, top-0: size of memory region in bytes.
346            .memory_init(imports.memory, bytecode_data);
347        // Evaluate top level scope.
348        instructions
349            .local_get(bytecode_ptr_local) // ptr to bytecode
350            .i32_const(bytecode_len)
351            .i32_const(0) // set option discriminator to none
352            .i32_const(0) // set function name ptr to null
353            .i32_const(0) // set function name len to 0
354            .call(imports.invoke);
355        let main = main.finish(vec![], &mut module.funcs);
356
357        module.exports.add("_start", main);
358        Ok(BytecodeMetadata::new(
359            bytecode_ptr_local,
360            bytecode_len,
361            bytecode_data,
362        ))
363    }
364
365    /// Generate function exports.
366    fn generate_exports(
367        &self,
368        module: &mut Module,
369        identifiers: &Identifiers,
370        bc_metadata: &BytecodeMetadata,
371    ) -> Result<()> {
372        if !self.function_exports.is_empty() {
373            let fn_name_ptr_local = module.locals.add(ValType::I32);
374            for export in &self.function_exports {
375                // For each JS function export, add an export that copies the name of the function into memory and invokes it.
376                let js_export_bytes = export.js.as_bytes();
377                let js_export_len: i32 = js_export_bytes.len().try_into().unwrap();
378                let fn_name_data = module.data.add(DataKind::Passive, js_export_bytes.to_vec());
379
380                let mut export_fn = FunctionBuilder::new(&mut module.types, &[], &[]);
381                export_fn
382                    .func_body()
383                    // Copy bytecode.
384                    .i32_const(0) // orig ptr
385                    .i32_const(0) // orig len
386                    .i32_const(1) // alignment
387                    .i32_const(bc_metadata.len) // size to copy
388                    .call(identifiers.cabi_realloc)
389                    .local_tee(bc_metadata.ptr)
390                    .i32_const(0) // offset into data segment
391                    .i32_const(bc_metadata.len) // size to copy
392                    .memory_init(identifiers.memory, bc_metadata.data_section) // copy bytecode into allocated memory
393                    .data_drop(bc_metadata.data_section)
394                    // Copy function name.
395                    .i32_const(0) // orig ptr
396                    .i32_const(0) // orig len
397                    .i32_const(1) // alignment
398                    .i32_const(js_export_len) // new size
399                    .call(identifiers.cabi_realloc)
400                    .local_tee(fn_name_ptr_local)
401                    .i32_const(0) // offset into data segment
402                    .i32_const(js_export_len) // size to copy
403                    .memory_init(identifiers.memory, fn_name_data) // copy fn name into allocated memory
404                    .data_drop(fn_name_data)
405                    // Call invoke.
406                    .local_get(bc_metadata.ptr)
407                    .i32_const(bc_metadata.len)
408                    .i32_const(1) // set function name option discriminator to some
409                    .local_get(fn_name_ptr_local)
410                    .i32_const(js_export_len)
411                    .call(identifiers.invoke);
412                let export_fn = export_fn.finish(vec![], &mut module.funcs);
413                module.exports.add(&export.wit, export_fn);
414            }
415        }
416        Ok(())
417    }
418
419    /// Clean-up the generated Wasm.
420    fn postprocess(&self, module: &mut Module) -> Result<Vec<u8>> {
421        match self.linking {
422            LinkingKind::Static => {
423                // Remove no longer necessary exports.
424                module.exports.remove("invoke")?;
425                module.exports.remove("compile-src")?;
426
427                // Run wasm-opt to optimize.
428                let tempdir = tempfile::tempdir()?;
429                let tempfile_path = tempdir.path().join("temp.wasm");
430
431                module.emit_wasm_file(&tempfile_path)?;
432
433                OptimizationOptions::new_opt_level_3() // Aggressively optimize for speed.
434                    .shrink_level(ShrinkLevel::Level0) // Don't optimize for size at the expense of performance.
435                    .debug_info(false)
436                    .run(&tempfile_path, &tempfile_path)?;
437
438                Ok(fs::read(&tempfile_path)?)
439            }
440            LinkingKind::Dynamic => Ok(module.emit_wasm()),
441        }
442    }
443
444    /// Generate a Wasm module which will run the provided JS source code.
445    pub fn generate(&mut self, js: &js::JS) -> Result<Vec<u8>> {
446        if self.wit_opts.defined() {
447            self.function_exports = exports::process_exports(
448                js,
449                self.wit_opts.unwrap_path(),
450                self.wit_opts.unwrap_world(),
451            )?;
452        }
453
454        let mut module = self.generate_initial_module()?;
455        let identifiers = self.resolve_identifiers(&mut module)?;
456        let bc_metadata = self.generate_main(&mut module, js, &identifiers)?;
457        self.generate_exports(&mut module, &identifiers, &bc_metadata)?;
458
459        transform::add_producers_section(
460            &mut module.producers,
461            self.producer_version
462                .as_deref()
463                .unwrap_or(env!("CARGO_PKG_VERSION")),
464        );
465        match self.source_embedding {
466            SourceEmbedding::Omitted => {}
467            SourceEmbedding::Uncompressed => {
468                module.customs.add(SourceCodeSection::uncompressed(js)?);
469            }
470            SourceEmbedding::Compressed => {
471                module.customs.add(SourceCodeSection::compressed(js)?);
472            }
473        }
474
475        let wasm = self.postprocess(&mut module)?;
476        Ok(wasm)
477    }
478}