javy_codegen/
lib.rs

1//! WebAssembly Code Generation for JavaScript
2//!
3//! This module provides functionality to emit Wasm modules which will run
4//! JavaScript source code with the QuickJS interpreter.
5//!
6//! Javy supports two main code generation paths:
7//!
8//! 1. Static code generation
9//! 2. Dynamic code generation
10//!
11//! ## Static code generation
12//!
13//! A single unit of code is generated, which is a Wasm module consisting of the
14//! bytecode representation of a given JavaScript program and the code for
15//! a particular version of the QuickJS engine compiled to Wasm.
16//!
17//! The generated Wasm module is self contained and the bytecode version matches
18//! the exact requirements of the embedded QuickJs engine.
19//!
20//! ## Dynamic code generation
21//!
22//! A single unit of code is generated, which is a Wasm module consisting of the
23//! bytecode representation of a given JavaScript program. The JavaScript
24//! bytecode is stored as part of the data section of the module which also
25//! contains instructions to execute that bytecode through dynamic linking
26//! at runtime.
27//!
28//! Dynamic code generation requires a plugin module to be used and linked
29//! against at runtime in order to execute the JavaScript bytecode. This
30//! operation involves carefully ensuring that a given plugin version matches
31//! the plugin version of the imports requested by the generated Wasm module
32//! as well as ensuring that any features available in the plugin match the
33//! features requsted by the JavaScript bytecode.
34//!
35//! ## Examples
36//!
37//! Simple Wasm module generation:
38//!
39//! ```no_run
40//! use std::path::Path;
41//! use javy_codegen::{Generator, LinkingKind, Plugin, JS};
42//!
43//! fn main() -> Result<(), Box<dyn std::error::Error>> {
44//!     // Load your target Javascript.
45//!     let js = JS::from_file(Path::new("example.js"))?;
46//!
47//!     // Load existing pre-initialized Javy plugin.
48//!     let plugin = Plugin::new_from_path(Path::new("example-plugin.wasm"))?;
49//!
50//!     // Configure code generator.
51//!     let mut generator = Generator::new(plugin);
52//!     generator.linking(LinkingKind::Static);
53//!
54//!     // Generate your Wasm module.
55//!     let wasm = generator.generate(&js);
56//!
57//!     Ok(())
58//! }
59//! ```
60//!
61//! ## Core concepts
62//! * [`Generator`] - The main entry point for generating Wasm modules.
63//! * [`Plugin`] - An initialized Javy plugin.
64//! * [`JS`] - JavaScript source code.
65//!
66//! ## Features
67//!
68//! * `plugin_internal` - Enables additional code generation options for
69//!   internal use. Please note that this flag enables an unstable feature. The
70//!   unstable API's exposed by this future may break in the future without
71//!   notice.
72
73use std::{fs, rc::Rc, sync::OnceLock};
74
75pub(crate) mod bytecode;
76pub(crate) mod exports;
77pub(crate) mod transform;
78
79pub(crate) mod js;
80pub(crate) mod plugin;
81pub(crate) mod wit;
82
83pub use crate::js::JS;
84pub use crate::plugin::Plugin;
85pub use crate::wit::WitOptions;
86
87use transform::SourceCodeSection;
88use walrus::{
89    DataId, DataKind, ExportItem, FunctionBuilder, FunctionId, LocalId, MemoryId, Module, ValType,
90};
91use wasm_opt::{OptimizationOptions, ShrinkLevel};
92use wasmtime_wasi::{pipe::MemoryInputPipe, WasiCtxBuilder};
93use wizer::{Linker, Wizer};
94
95use anyhow::Result;
96
97static STDIN_PIPE: OnceLock<MemoryInputPipe> = OnceLock::new();
98
99/// The kind of linking to use.
100#[derive(Clone, Default)]
101pub enum LinkingKind {
102    #[default]
103    /// Static linking
104    Static,
105    /// Dynamic linking
106    Dynamic,
107}
108
109/// Identifiers used by the generated module.
110// This is an internal detail of this module.
111pub(crate) struct Identifiers {
112    canonical_abi_realloc: FunctionId,
113    eval_bytecode: Option<FunctionId>,
114    invoke: FunctionId,
115    memory: MemoryId,
116}
117
118impl Identifiers {
119    fn new(
120        canonical_abi_realloc: FunctionId,
121        eval_bytecode: Option<FunctionId>,
122        invoke: FunctionId,
123        memory: MemoryId,
124    ) -> Self {
125        Self {
126            canonical_abi_realloc,
127            eval_bytecode,
128            invoke,
129            memory,
130        }
131    }
132}
133
134/// Helper struct to keep track of bytecode metadata.
135// This is an internal detail of this module.
136pub(crate) struct BytecodeMetadata {
137    ptr: LocalId,
138    len: i32,
139    data_section: DataId,
140}
141
142impl BytecodeMetadata {
143    fn new(ptr: LocalId, len: i32, data_section: DataId) -> Self {
144        Self {
145            ptr,
146            len,
147            data_section,
148        }
149    }
150}
151
152/// Generator used to produce Wasm binaries from JS source code.
153#[derive(Default, Clone)]
154pub struct Generator {
155    /// Plugin to use.
156    pub(crate) plugin: plugin::Plugin,
157    /// What kind of linking to use when generating a module.
158    pub(crate) linking: LinkingKind,
159    /// Whether to embed the compressed JS source in the generated module.
160    pub(crate) source_compression: bool,
161    /// WIT options for code generation.
162    pub(crate) wit_opts: wit::WitOptions,
163    /// JavaScript function exports.
164    pub(crate) function_exports: exports::Exports,
165    /// The kind of plugin a generator will link.
166    plugin_kind: plugin::PluginKind,
167    /// An optional JS runtime config provided as JSON bytes.
168    js_runtime_config: Vec<u8>,
169}
170
171impl Generator {
172    /// Create a new [`Generator`].
173    pub fn new(plugin: Plugin) -> Self {
174        Self {
175            plugin,
176            ..Self::default()
177        }
178    }
179
180    /// Set the kind of linking (default: [`LinkingKind::Static`])
181    pub fn linking(&mut self, linking: LinkingKind) -> &mut Self {
182        self.linking = linking;
183        self
184    }
185
186    /// Set if JS source compression is enabled (default: false).
187    pub fn source_compression(&mut self, source_compression: bool) -> &mut Self {
188        self.source_compression = source_compression;
189        self
190    }
191
192    /// Set the wit options. (default: Empty [`WitOptions`])
193    pub fn wit_opts(&mut self, wit_opts: wit::WitOptions) -> &mut Self {
194        self.wit_opts = wit_opts;
195        self
196    }
197
198    #[cfg(feature = "plugin_internal")]
199    /// Set true if linking with a default plugin module.
200    pub fn linking_default_plugin(&mut self, value: bool) -> &mut Self {
201        self.plugin_kind = if value {
202            plugin::PluginKind::Default
203        } else {
204            plugin::PluginKind::User
205        };
206
207        self
208    }
209
210    #[cfg(feature = "plugin_internal")]
211    /// Set true if linking with a V2 plugin module.
212    pub fn linking_v2_plugin(&mut self, value: bool) -> &mut Self {
213        self.plugin_kind = if value {
214            plugin::PluginKind::V2
215        } else {
216            plugin::PluginKind::User
217        };
218
219        self
220    }
221
222    #[cfg(feature = "plugin_internal")]
223    /// Set the JS runtime configuration options to pass to the module.
224    pub fn js_runtime_config(&mut self, js_runtime_config: Vec<u8>) -> &mut Self {
225        self.js_runtime_config = js_runtime_config;
226        self
227    }
228}
229
230impl Generator {
231    /// Generate the starting module.
232    fn generate_initial_module(&self) -> Result<Module> {
233        let config = transform::module_config();
234        let module = match &self.linking {
235            LinkingKind::Static => {
236                // Copy config JSON into stdin for `initialize_runtime` function.
237                STDIN_PIPE
238                    .set(MemoryInputPipe::new(self.js_runtime_config.clone()))
239                    .unwrap();
240                let wasm = Wizer::new()
241                    .init_func("initialize_runtime")
242                    .make_linker(Some(Rc::new(move |engine| {
243                        let mut linker = Linker::new(engine);
244                        wasmtime_wasi::preview1::add_to_linker_sync(&mut linker, move |cx| {
245                            if cx.wasi_ctx.is_none() {
246                                // The underlying buffer backing the pipe is an Arc
247                                // so the cloning should be fast.
248                                let config = STDIN_PIPE.get().unwrap().clone();
249                                cx.wasi_ctx = Some(
250                                    WasiCtxBuilder::new()
251                                        .stdin(config)
252                                        .inherit_stdout()
253                                        .inherit_stderr()
254                                        .build_p1(),
255                                );
256                            }
257                            cx.wasi_ctx.as_mut().unwrap()
258                        })?;
259                        Ok(linker)
260                    })))?
261                    .wasm_bulk_memory(true)
262                    .run(self.plugin.as_bytes())?;
263                config.parse(&wasm)?
264            }
265            LinkingKind::Dynamic => Module::with_config(config),
266        };
267        Ok(module)
268    }
269
270    /// Resolve identifiers for functions and memory.
271    pub(crate) fn resolve_identifiers(&self, module: &mut Module) -> Result<Identifiers> {
272        match self.linking {
273            LinkingKind::Static => {
274                let canonical_abi_realloc_fn = module.exports.get_func("canonical_abi_realloc")?;
275                let eval_bytecode = module.exports.get_func("eval_bytecode").ok();
276                let invoke = module.exports.get_func("invoke")?;
277                let ExportItem::Memory(memory) = module
278                    .exports
279                    .iter()
280                    .find(|e| e.name == "memory")
281                    .ok_or_else(|| anyhow::anyhow!("Missing memory export"))?
282                    .item
283                else {
284                    anyhow::bail!("Export with name memory must be of type memory")
285                };
286                Ok(Identifiers::new(
287                    canonical_abi_realloc_fn,
288                    eval_bytecode,
289                    invoke,
290                    memory,
291                ))
292            }
293            LinkingKind::Dynamic => {
294                // All code by default is assumed to be linking against a default
295                // or a user provided plugin. However V2 plugins require a different
296                // import namespace to be used instead so we use the plugin_kind to
297                // to determine the import_namespace.
298                let import_namespace = self.plugin_kind.import_namespace(&self.plugin)?;
299
300                let canonical_abi_realloc_type = module.types.add(
301                    &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
302                    &[ValType::I32],
303                );
304                let (canonical_abi_realloc_fn_id, _) = module.add_import_func(
305                    &import_namespace,
306                    "canonical_abi_realloc",
307                    canonical_abi_realloc_type,
308                );
309
310                // User plugins can use `invoke` with a null function name.
311                // User plugins also won't have an `eval_bytecode` function to
312                // import. We want to remove `eval_bytecode` from the default
313                // plugin so we don't want to emit more uses of it.
314                let eval_bytecode_fn_id = match self.plugin_kind {
315                    plugin::PluginKind::V2 => {
316                        let eval_bytecode_type =
317                            module.types.add(&[ValType::I32, ValType::I32], &[]);
318                        let (eval_bytecode_fn_id, _) = module.add_import_func(
319                            &import_namespace,
320                            "eval_bytecode",
321                            eval_bytecode_type,
322                        );
323                        Some(eval_bytecode_fn_id)
324                    }
325                    _ => None,
326                };
327
328                let invoke_type = module.types.add(
329                    &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
330                    &[],
331                );
332                let (invoke_fn_id, _) =
333                    module.add_import_func(&import_namespace, "invoke", invoke_type);
334
335                let (memory_id, _) = module.add_import_memory(
336                    &import_namespace,
337                    "memory",
338                    false,
339                    false,
340                    0,
341                    None,
342                    None,
343                );
344
345                Ok(Identifiers::new(
346                    canonical_abi_realloc_fn_id,
347                    eval_bytecode_fn_id,
348                    invoke_fn_id,
349                    memory_id,
350                ))
351            }
352        }
353    }
354
355    /// Generate the main function.
356    fn generate_main(
357        &self,
358        module: &mut Module,
359        js: &js::JS,
360        imports: &Identifiers,
361    ) -> Result<BytecodeMetadata> {
362        let bytecode = js.compile(&self.plugin)?;
363        let bytecode_len: i32 = bytecode.len().try_into()?;
364        let bytecode_data = module.data.add(DataKind::Passive, bytecode);
365
366        let mut main = FunctionBuilder::new(&mut module.types, &[], &[]);
367        let bytecode_ptr_local = module.locals.add(ValType::I32);
368        let mut instructions = main.func_body();
369        instructions
370            // Allocate memory in plugin instance for bytecode array.
371            .i32_const(0) // orig ptr
372            .i32_const(0) // orig size
373            .i32_const(1) // alignment
374            .i32_const(bytecode_len) // new size
375            .call(imports.canonical_abi_realloc)
376            // Copy bytecode array into allocated memory.
377            .local_tee(bytecode_ptr_local) // save returned address to local and set as dest addr for mem.init
378            .i32_const(0) // offset into data segment for mem.init
379            .i32_const(bytecode_len) // size to copy from data segment
380            // top-2: dest addr, top-1: offset into source, top-0: size of memory region in bytes.
381            .memory_init(imports.memory, bytecode_data);
382        // Evaluate top level scope.
383        if let Some(eval_bytecode) = imports.eval_bytecode {
384            instructions
385                .local_get(bytecode_ptr_local) // ptr to bytecode
386                .i32_const(bytecode_len)
387                .call(eval_bytecode);
388        } else {
389            // Assert we're not emitting a call with a null function to
390            // invoke for the v2 plugin. `javy_quickjs_provider_v2` will never
391            // support calling `invoke` with a null function. The default
392            // plugin and user plugins do accept null functions.
393            assert!(
394                !matches!(self.plugin_kind, plugin::PluginKind::V2),
395                "Using invoke with null function not supported for v2 plugin"
396            );
397            instructions
398                .local_get(bytecode_ptr_local) // ptr to bytecode
399                .i32_const(bytecode_len)
400                .i32_const(0) // set function name ptr to null
401                .i32_const(0) // set function name len to 0
402                .call(imports.invoke);
403        }
404        let main = main.finish(vec![], &mut module.funcs);
405
406        module.exports.add("_start", main);
407        Ok(BytecodeMetadata::new(
408            bytecode_ptr_local,
409            bytecode_len,
410            bytecode_data,
411        ))
412    }
413
414    /// Generate function exports.
415    fn generate_exports(
416        &self,
417        module: &mut Module,
418        identifiers: &Identifiers,
419        bc_metadata: &BytecodeMetadata,
420    ) -> Result<()> {
421        if !self.function_exports.is_empty() {
422            let fn_name_ptr_local = module.locals.add(ValType::I32);
423            for export in &self.function_exports {
424                // For each JS function export, add an export that copies the name of the function into memory and invokes it.
425                let js_export_bytes = export.js.as_bytes();
426                let js_export_len: i32 = js_export_bytes.len().try_into().unwrap();
427                let fn_name_data = module.data.add(DataKind::Passive, js_export_bytes.to_vec());
428
429                let mut export_fn = FunctionBuilder::new(&mut module.types, &[], &[]);
430                export_fn
431                    .func_body()
432                    // Copy bytecode.
433                    .i32_const(0) // orig ptr
434                    .i32_const(0) // orig len
435                    .i32_const(1) // alignment
436                    .i32_const(bc_metadata.len) // size to copy
437                    .call(identifiers.canonical_abi_realloc)
438                    .local_tee(bc_metadata.ptr)
439                    .i32_const(0) // offset into data segment
440                    .i32_const(bc_metadata.len) // size to copy
441                    .memory_init(identifiers.memory, bc_metadata.data_section) // copy bytecode into allocated memory
442                    .data_drop(bc_metadata.data_section)
443                    // Copy function name.
444                    .i32_const(0) // orig ptr
445                    .i32_const(0) // orig len
446                    .i32_const(1) // alignment
447                    .i32_const(js_export_len) // new size
448                    .call(identifiers.canonical_abi_realloc)
449                    .local_tee(fn_name_ptr_local)
450                    .i32_const(0) // offset into data segment
451                    .i32_const(js_export_len) // size to copy
452                    .memory_init(identifiers.memory, fn_name_data) // copy fn name into allocated memory
453                    .data_drop(fn_name_data)
454                    // Call invoke.
455                    .local_get(bc_metadata.ptr)
456                    .i32_const(bc_metadata.len)
457                    .local_get(fn_name_ptr_local)
458                    .i32_const(js_export_len)
459                    .call(identifiers.invoke);
460                let export_fn = export_fn.finish(vec![], &mut module.funcs);
461                module.exports.add(&export.wit, export_fn);
462            }
463        }
464        Ok(())
465    }
466
467    /// Clean-up the generated Wasm.
468    fn postprocess(&self, module: &mut Module) -> Result<Vec<u8>> {
469        match self.linking {
470            LinkingKind::Static => {
471                // Remove no longer necessary exports.
472                module.exports.remove("canonical_abi_realloc")?;
473
474                // Only internal plugins expose eval_bytecode function.
475                if matches!(
476                    self.plugin_kind,
477                    plugin::PluginKind::Default | plugin::PluginKind::V2
478                ) {
479                    module.exports.remove("eval_bytecode")?;
480                }
481
482                module.exports.remove("invoke")?;
483                module.exports.remove("compile_src")?;
484
485                // Run wasm-opt to optimize.
486                let tempdir = tempfile::tempdir()?;
487                let tempfile_path = tempdir.path().join("temp.wasm");
488
489                module.emit_wasm_file(&tempfile_path)?;
490
491                OptimizationOptions::new_opt_level_3() // Aggressively optimize for speed.
492                    .shrink_level(ShrinkLevel::Level0) // Don't optimize for size at the expense of performance.
493                    .debug_info(false)
494                    .run(&tempfile_path, &tempfile_path)?;
495
496                Ok(fs::read(&tempfile_path)?)
497            }
498            LinkingKind::Dynamic => Ok(module.emit_wasm()),
499        }
500    }
501
502    // For the example generated WAT, the `bytecode_len` is 137
503    // (module
504    //    (type (;0;) (func))
505    //    (type (;1;) (func (param i32 i32)))
506    //    (type (;2;) (func (param i32 i32 i32 i32)))
507    //    (type (;3;) (func (param i32 i32 i32 i32) (result i32)))
508    //    (import "javy_quickjs_provider_v2" "canonical_abi_realloc" (func (;0;) (type 3)))
509    //    (import "javy_quickjs_provider_v2" "eval_bytecode" (func (;1;) (type 1)))
510    //    (import "javy_quickjs_provider_v2" "memory" (memory (;0;) 0))
511    //    (import "javy_quickjs_provider_v2" "invoke" (func (;2;) (type 2)))
512    //    (func (;3;) (type 0)
513    //      (local i32 i32)
514    //      i32.const 0
515    //      i32.const 0
516    //      i32.const 1
517    //      i32.const 137
518    //      call 0
519    //      local.tee 0
520    //      i32.const 0
521    //      i32.const 137
522    //      memory.init 0
523    //      data.drop 0
524    //      i32.const 0
525    //      i32.const 0
526    //      i32.const 1
527    //      i32.const 3
528    //      call 0
529    //      local.tee 1
530    //      i32.const 0
531    //      i32.const 3
532    //      memory.init 1
533    //      data.drop 1
534    //      local.get 0
535    //      i32.const 137
536    //      local.get 1
537    //      i32.const 3
538    //      call 2
539    //    )
540    //    (func (;4;) (type 0)
541    //      (local i32)
542    //      i32.const 0
543    //      i32.const 0
544    //      i32.const 1
545    //      i32.const 137
546    //      call 0
547    //      local.tee 0
548    //      i32.const 0
549    //      i32.const 137
550    //      memory.init 0
551    //      local.get 0
552    //      i32.const 137
553    //      call 1
554    //    )
555    //    (export "_start" (func 4))
556    //    (export "foo" (func 3))
557    //    (data (;0;) "\02\05\18function.mjs\06foo\0econsole\06log\06bar\0f\bc\03\00\01\00\00\be\03\00\00\0e\00\06\01\a0\01\00\00\00\03\01\01\1a\00\be\03\00\01\08\ea\05\c0\00\e1)8\e0\00\00\00B\e1\00\00\00\04\e2\00\00\00$\01\00)\bc\03\01\04\01\00\07\0a\0eC\06\01\be\03\00\00\00\03\00\00\13\008\e0\00\00\00B\e1\00\00\00\04\df\00\00\00$\01\00)\bc\03\01\02\03]")
558    //    (data (;1;) "foo")
559    //  )
560    /// Generate a Wasm module which will run the provided JS source code.
561    pub fn generate(&mut self, js: &js::JS) -> Result<Vec<u8>> {
562        if self.wit_opts.defined() {
563            self.function_exports = exports::process_exports(
564                js,
565                self.wit_opts.unwrap_path(),
566                self.wit_opts.unwrap_world(),
567            )?;
568        }
569
570        let mut module = self.generate_initial_module()?;
571        let identifiers = self.resolve_identifiers(&mut module)?;
572        let bc_metadata = self.generate_main(&mut module, js, &identifiers)?;
573        self.generate_exports(&mut module, &identifiers, &bc_metadata)?;
574
575        transform::add_producers_section(&mut module.producers);
576        if !self.source_compression {
577            module.customs.add(SourceCodeSection::uncompressed(js)?);
578        } else {
579            module.customs.add(SourceCodeSection::compressed(js)?);
580        }
581
582        let wasm = self.postprocess(&mut module)?;
583        Ok(wasm)
584    }
585}