javy_codegen/
lib.rs

1//! WebAssembly Code Generation for JavaScript
2//!
3//! This module provides functionality to emit Wasm modules which will run
4//! JavaScript source code with the QuickJS interpreter.
5//!
6//! Javy supports two main code generation paths:
7//!
8//! 1. Static code generation
9//! 2. Dynamic code generation
10//!
11//! ## Static code generation
12//!
13//! A single unit of code is generated, which is a Wasm module consisting of the
14//! bytecode representation of a given JavaScript program and the code for
15//! a particular version of the QuickJS engine compiled to Wasm.
16//!
17//! The generated Wasm module is self contained and the bytecode version matches
18//! the exact requirements of the embedded QuickJs engine.
19//!
20//! ## Dynamic code generation
21//!
22//! A single unit of code is generated, which is a Wasm module consisting of the
23//! bytecode representation of a given JavaScript program. The JavaScript
24//! bytecode is stored as part of the data section of the module which also
25//! contains instructions to execute that bytecode through dynamic linking
26//! at runtime.
27//!
28//! Dynamic code generation requires a plugin module to be used and linked
29//! against at runtime in order to execute the JavaScript bytecode. This
30//! operation involves carefully ensuring that a given plugin version matches
31//! the plugin version of the imports requested by the generated Wasm module
32//! as well as ensuring that any features available in the plugin match the
33//! features requsted by the JavaScript bytecode.
34//!
35//! ## Examples
36//!
37//! Simple Wasm module generation:
38//!
39//! ```no_run
40//! use std::path::Path;
41//! use javy_codegen::{Generator, LinkingKind, Plugin, JS};
42//!
43//! fn main() -> Result<(), Box<dyn std::error::Error>> {
44//!     // Load your target Javascript.
45//!     let js = JS::from_file(Path::new("example.js"))?;
46//!
47//!     // Load existing pre-initialized Javy plugin.
48//!     let plugin = Plugin::new_from_path(Path::new("example-plugin.wasm"))?;
49//!
50//!     // Configure code generator.
51//!     let mut generator = Generator::new(plugin);
52//!     generator.linking(LinkingKind::Static);
53//!
54//!     // Generate your Wasm module.
55//!     let wasm = generator.generate(&js);
56//!
57//!     Ok(())
58//! }
59//! ```
60//!
61//! ## Core concepts
62//! * [`Generator`] - The main entry point for generating Wasm modules.
63//! * [`Plugin`] - An initialized Javy plugin.
64//! * [`JS`] - JavaScript source code.
65//!
66//! ## Features
67//!
68//! * `plugin_internal` - Enables additional code generation options for
69//!   internal use. Please note that this flag enables an unstable feature. The
70//!   unstable API's exposed by this future may break in the future without
71//!   notice.
72
73use std::{fs, rc::Rc, sync::OnceLock};
74
75pub(crate) mod bytecode;
76pub(crate) mod exports;
77pub(crate) mod transform;
78
79pub(crate) mod js;
80pub(crate) mod plugin;
81pub(crate) mod wit;
82
83pub use crate::js::JS;
84pub use crate::plugin::Plugin;
85pub use crate::wit::WitOptions;
86use crate::{exports::Exports, plugin::PluginKind};
87
88use transform::SourceCodeSection;
89use walrus::{
90    DataId, DataKind, ExportItem, FunctionBuilder, FunctionId, LocalId, MemoryId, Module, ValType,
91};
92use wasm_opt::{OptimizationOptions, ShrinkLevel};
93use wasmtime_wasi::{pipe::MemoryInputPipe, WasiCtxBuilder};
94use wizer::{Linker, Wizer};
95
96use anyhow::Result;
97
98static STDIN_PIPE: OnceLock<MemoryInputPipe> = OnceLock::new();
99
100/// The kind of linking to use.
101#[derive(Debug, Clone, Default)]
102pub enum LinkingKind {
103    #[default]
104    /// Static linking
105    Static,
106    /// Dynamic linking
107    Dynamic,
108}
109
110/// Source code embedding options for the generated Wasm module.
111#[derive(Debug, Clone, Default)]
112pub enum SourceEmbedding {
113    #[default]
114    /// Embed the source code without compression.
115    Uncompressed,
116    /// Embed the source code with compression.
117    Compressed,
118    /// Don't embed the source code.
119    Omitted,
120}
121
122/// Identifiers used by the generated module.
123// This is an internal detail of this module.
124#[derive(Debug)]
125pub(crate) struct Identifiers {
126    cabi_realloc: FunctionId,
127    eval_bytecode: Option<FunctionId>,
128    invoke: FunctionId,
129    memory: MemoryId,
130}
131
132impl Identifiers {
133    fn new(
134        cabi_realloc: FunctionId,
135        eval_bytecode: Option<FunctionId>,
136        invoke: FunctionId,
137        memory: MemoryId,
138    ) -> Self {
139        Self {
140            cabi_realloc,
141            eval_bytecode,
142            invoke,
143            memory,
144        }
145    }
146}
147
148/// Helper struct to keep track of bytecode metadata.
149// This is an internal detail of this module.
150#[derive(Debug)]
151pub(crate) struct BytecodeMetadata {
152    ptr: LocalId,
153    len: i32,
154    data_section: DataId,
155}
156
157impl BytecodeMetadata {
158    fn new(ptr: LocalId, len: i32, data_section: DataId) -> Self {
159        Self {
160            ptr,
161            len,
162            data_section,
163        }
164    }
165}
166
167/// Generator used to produce Wasm binaries from JS source code.
168#[derive(Debug, Default, Clone)]
169pub struct Generator {
170    /// Plugin to use.
171    pub(crate) plugin: Plugin,
172    /// What kind of linking to use when generating a module.
173    pub(crate) linking: LinkingKind,
174    /// Source code embedding option for the generated module.
175    pub(crate) source_embedding: SourceEmbedding,
176    /// WIT options for code generation.
177    pub(crate) wit_opts: WitOptions,
178    /// JavaScript function exports.
179    pub(crate) function_exports: Exports,
180    /// The kind of plugin a generator will link.
181    plugin_kind: PluginKind,
182    /// An optional JS runtime config provided as JSON bytes.
183    js_runtime_config: Vec<u8>,
184    /// The version string to include in the producers custom section.
185    producer_version: Option<String>,
186}
187
188impl Generator {
189    /// Create a new [`Generator`].
190    pub fn new(plugin: Plugin) -> Self {
191        Self {
192            plugin,
193            ..Self::default()
194        }
195    }
196
197    /// Set the kind of linking (default: [`LinkingKind::Static`])
198    pub fn linking(&mut self, linking: LinkingKind) -> &mut Self {
199        self.linking = linking;
200        self
201    }
202
203    /// Set the source embedding option (default: [`SourceEmbedding::Compressed`])
204    pub fn source_embedding(&mut self, source_embedding: SourceEmbedding) -> &mut Self {
205        self.source_embedding = source_embedding;
206        self
207    }
208
209    /// Set the wit options. (default: Empty [`WitOptions`])
210    pub fn wit_opts(&mut self, wit_opts: wit::WitOptions) -> &mut Self {
211        self.wit_opts = wit_opts;
212        self
213    }
214
215    #[cfg(feature = "plugin_internal")]
216    /// Set true if linking with a V2 plugin module.
217    pub fn linking_v2_plugin(&mut self, value: bool) -> &mut Self {
218        self.plugin_kind = if value {
219            PluginKind::V2
220        } else {
221            PluginKind::User
222        };
223
224        self
225    }
226
227    #[cfg(feature = "plugin_internal")]
228    /// Set the JS runtime configuration options to pass to the module.
229    pub fn js_runtime_config(&mut self, js_runtime_config: Vec<u8>) -> &mut Self {
230        self.js_runtime_config = js_runtime_config;
231        self
232    }
233
234    /// Sets the version string to use in the producers custom section.
235    pub fn producer_version(&mut self, producer_version: String) -> &mut Self {
236        self.producer_version = Some(producer_version);
237        self
238    }
239}
240
241impl Generator {
242    /// Generate the starting module.
243    fn generate_initial_module(&self) -> Result<Module> {
244        let config = transform::module_config();
245        let module = match &self.linking {
246            LinkingKind::Static => {
247                // Copy config JSON into stdin for `initialize-runtime` function.
248                STDIN_PIPE
249                    .set(MemoryInputPipe::new(self.js_runtime_config.clone()))
250                    .unwrap();
251                let wasm = Wizer::new()
252                    .init_func("initialize-runtime")
253                    .make_linker(Some(Rc::new(move |engine| {
254                        let mut linker = Linker::new(engine);
255                        wasmtime_wasi::preview1::add_to_linker_sync(&mut linker, move |cx| {
256                            if cx.wasi_ctx.is_none() {
257                                // The underlying buffer backing the pipe is an Arc
258                                // so the cloning should be fast.
259                                let config = STDIN_PIPE.get().unwrap().clone();
260                                cx.wasi_ctx = Some(
261                                    WasiCtxBuilder::new()
262                                        .stdin(config)
263                                        .inherit_stdout()
264                                        .inherit_stderr()
265                                        .build_p1(),
266                                );
267                            }
268                            cx.wasi_ctx.as_mut().unwrap()
269                        })?;
270                        Ok(linker)
271                    })))?
272                    .wasm_bulk_memory(true)
273                    .run(self.plugin.as_bytes())?;
274                config.parse(&wasm)?
275            }
276            LinkingKind::Dynamic => Module::with_config(config),
277        };
278        Ok(module)
279    }
280
281    /// Resolve identifiers for functions and memory.
282    pub(crate) fn resolve_identifiers(&self, module: &mut Module) -> Result<Identifiers> {
283        match self.linking {
284            LinkingKind::Static => {
285                let cabi_realloc = module
286                    .exports
287                    .get_func(self.plugin_kind.realloc_fn_name())?;
288                let invoke = module.exports.get_func("invoke")?;
289                let ExportItem::Memory(memory) = module
290                    .exports
291                    .iter()
292                    .find(|e| e.name == "memory")
293                    .ok_or_else(|| anyhow::anyhow!("Missing memory export"))?
294                    .item
295                else {
296                    anyhow::bail!("Export with name memory must be of type memory")
297                };
298                Ok(Identifiers::new(cabi_realloc, None, invoke, memory))
299            }
300            LinkingKind::Dynamic => {
301                // All code by default is assumed to be linking against a default
302                // or a user provided plugin. However V2 plugins require a different
303                // import namespace to be used instead so we use the plugin_kind to
304                // to determine the import_namespace.
305                let import_namespace = self.plugin_kind.import_namespace(&self.plugin)?;
306
307                let cabi_realloc_type = module.types.add(
308                    &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
309                    &[ValType::I32],
310                );
311                let (cabi_realloc_fn_id, _) = module.add_import_func(
312                    &import_namespace,
313                    self.plugin_kind.realloc_fn_name(),
314                    cabi_realloc_type,
315                );
316
317                // User plugins can use `invoke` with a null function name.
318                // Non-v2 plugins also won't have an `eval_bytecode` function to
319                // import.
320                let eval_bytecode_fn_id = if self.plugin_kind == PluginKind::V2 {
321                    let eval_bytecode_type = module.types.add(&[ValType::I32, ValType::I32], &[]);
322                    let (eval_bytecode_fn_id, _) = module.add_import_func(
323                        &import_namespace,
324                        "eval_bytecode",
325                        eval_bytecode_type,
326                    );
327                    Some(eval_bytecode_fn_id)
328                } else {
329                    None
330                };
331
332                let invoke_params = if self.plugin_kind == PluginKind::V2 {
333                    [ValType::I32, ValType::I32, ValType::I32, ValType::I32].as_slice()
334                } else {
335                    [
336                        ValType::I32,
337                        ValType::I32,
338                        ValType::I32,
339                        ValType::I32,
340                        ValType::I32,
341                    ]
342                    .as_slice()
343                };
344                let invoke_type = module.types.add(invoke_params, &[]);
345                let (invoke_fn_id, _) =
346                    module.add_import_func(&import_namespace, "invoke", invoke_type);
347
348                let (memory_id, _) = module.add_import_memory(
349                    &import_namespace,
350                    "memory",
351                    false,
352                    false,
353                    0,
354                    None,
355                    None,
356                );
357
358                Ok(Identifiers::new(
359                    cabi_realloc_fn_id,
360                    eval_bytecode_fn_id,
361                    invoke_fn_id,
362                    memory_id,
363                ))
364            }
365        }
366    }
367
368    /// Generate the main function.
369    fn generate_main(
370        &self,
371        module: &mut Module,
372        js: &js::JS,
373        imports: &Identifiers,
374    ) -> Result<BytecodeMetadata> {
375        let bytecode = bytecode::compile_source(&self.plugin, self.plugin_kind, js.as_bytes())?;
376        let bytecode_len: i32 = bytecode.len().try_into()?;
377        let bytecode_data = module.data.add(DataKind::Passive, bytecode);
378
379        let mut main = FunctionBuilder::new(&mut module.types, &[], &[]);
380        let bytecode_ptr_local = module.locals.add(ValType::I32);
381        let mut instructions = main.func_body();
382        instructions
383            // Allocate memory in plugin instance for bytecode array.
384            .i32_const(0) // orig ptr
385            .i32_const(0) // orig size
386            .i32_const(1) // alignment
387            .i32_const(bytecode_len) // new size
388            .call(imports.cabi_realloc)
389            // Copy bytecode array into allocated memory.
390            .local_tee(bytecode_ptr_local) // save returned address to local and set as dest addr for mem.init
391            .i32_const(0) // offset into data segment for mem.init
392            .i32_const(bytecode_len) // size to copy from data segment
393            // top-2: dest addr, top-1: offset into source, top-0: size of memory region in bytes.
394            .memory_init(imports.memory, bytecode_data);
395        // Evaluate top level scope.
396        if let Some(eval_bytecode) = imports.eval_bytecode {
397            instructions
398                .local_get(bytecode_ptr_local) // ptr to bytecode
399                .i32_const(bytecode_len)
400                .call(eval_bytecode);
401        } else {
402            // Assert we're not emitting a call with a null function to
403            // invoke for the v2 plugin. `javy_quickjs_provider_v2` will never
404            // support calling `invoke` with a null function. The default
405            // plugin and user plugins do accept null functions.
406            assert!(
407                self.plugin_kind != PluginKind::V2,
408                "Using invoke with null function not supported for v2 plugin"
409            );
410            instructions
411                .local_get(bytecode_ptr_local) // ptr to bytecode
412                .i32_const(bytecode_len)
413                .i32_const(0) // set option discriminator to none
414                .i32_const(0) // set function name ptr to null
415                .i32_const(0) // set function name len to 0
416                .call(imports.invoke);
417        }
418        let main = main.finish(vec![], &mut module.funcs);
419
420        module.exports.add("_start", main);
421        Ok(BytecodeMetadata::new(
422            bytecode_ptr_local,
423            bytecode_len,
424            bytecode_data,
425        ))
426    }
427
428    /// Generate function exports.
429    fn generate_exports(
430        &self,
431        module: &mut Module,
432        identifiers: &Identifiers,
433        bc_metadata: &BytecodeMetadata,
434    ) -> Result<()> {
435        if !self.function_exports.is_empty() {
436            let fn_name_ptr_local = module.locals.add(ValType::I32);
437            for export in &self.function_exports {
438                // For each JS function export, add an export that copies the name of the function into memory and invokes it.
439                let js_export_bytes = export.js.as_bytes();
440                let js_export_len: i32 = js_export_bytes.len().try_into().unwrap();
441                let fn_name_data = module.data.add(DataKind::Passive, js_export_bytes.to_vec());
442
443                let mut export_fn = FunctionBuilder::new(&mut module.types, &[], &[]);
444                export_fn
445                    .func_body()
446                    // Copy bytecode.
447                    .i32_const(0) // orig ptr
448                    .i32_const(0) // orig len
449                    .i32_const(1) // alignment
450                    .i32_const(bc_metadata.len) // size to copy
451                    .call(identifiers.cabi_realloc)
452                    .local_tee(bc_metadata.ptr)
453                    .i32_const(0) // offset into data segment
454                    .i32_const(bc_metadata.len) // size to copy
455                    .memory_init(identifiers.memory, bc_metadata.data_section) // copy bytecode into allocated memory
456                    .data_drop(bc_metadata.data_section)
457                    // Copy function name.
458                    .i32_const(0) // orig ptr
459                    .i32_const(0) // orig len
460                    .i32_const(1) // alignment
461                    .i32_const(js_export_len) // new size
462                    .call(identifiers.cabi_realloc)
463                    .local_tee(fn_name_ptr_local)
464                    .i32_const(0) // offset into data segment
465                    .i32_const(js_export_len) // size to copy
466                    .memory_init(identifiers.memory, fn_name_data) // copy fn name into allocated memory
467                    .data_drop(fn_name_data)
468                    // Call invoke.
469                    .local_get(bc_metadata.ptr)
470                    .i32_const(bc_metadata.len);
471
472                if self.plugin_kind != PluginKind::V2 {
473                    export_fn.func_body().i32_const(1); // set function name option discriminator to some
474                }
475
476                export_fn
477                    .func_body()
478                    .local_get(fn_name_ptr_local)
479                    .i32_const(js_export_len)
480                    .call(identifiers.invoke);
481                let export_fn = export_fn.finish(vec![], &mut module.funcs);
482                module.exports.add(&export.wit, export_fn);
483            }
484        }
485        Ok(())
486    }
487
488    /// Clean-up the generated Wasm.
489    fn postprocess(&self, module: &mut Module) -> Result<Vec<u8>> {
490        match self.linking {
491            LinkingKind::Static => {
492                // Remove no longer necessary exports.
493                module.exports.remove(self.plugin_kind.realloc_fn_name())?;
494
495                // Only v2 plugin exposes eval_bytecode function.
496                if self.plugin_kind == PluginKind::V2 {
497                    module.exports.remove("eval_bytecode")?;
498                }
499
500                module.exports.remove("invoke")?;
501                module.exports.remove(self.plugin_kind.compile_fn_name())?;
502
503                // Run wasm-opt to optimize.
504                let tempdir = tempfile::tempdir()?;
505                let tempfile_path = tempdir.path().join("temp.wasm");
506
507                module.emit_wasm_file(&tempfile_path)?;
508
509                OptimizationOptions::new_opt_level_3() // Aggressively optimize for speed.
510                    .shrink_level(ShrinkLevel::Level0) // Don't optimize for size at the expense of performance.
511                    .debug_info(false)
512                    .run(&tempfile_path, &tempfile_path)?;
513
514                Ok(fs::read(&tempfile_path)?)
515            }
516            LinkingKind::Dynamic => Ok(module.emit_wasm()),
517        }
518    }
519
520    /// Generate a Wasm module which will run the provided JS source code.
521    pub fn generate(&mut self, js: &js::JS) -> Result<Vec<u8>> {
522        if self.wit_opts.defined() {
523            self.function_exports = exports::process_exports(
524                js,
525                self.wit_opts.unwrap_path(),
526                self.wit_opts.unwrap_world(),
527            )?;
528        }
529
530        let mut module = self.generate_initial_module()?;
531        let identifiers = self.resolve_identifiers(&mut module)?;
532        let bc_metadata = self.generate_main(&mut module, js, &identifiers)?;
533        self.generate_exports(&mut module, &identifiers, &bc_metadata)?;
534
535        transform::add_producers_section(
536            &mut module.producers,
537            self.producer_version
538                .as_deref()
539                .unwrap_or(env!("CARGO_PKG_VERSION")),
540        );
541        match self.source_embedding {
542            SourceEmbedding::Omitted => {}
543            SourceEmbedding::Uncompressed => {
544                module.customs.add(SourceCodeSection::uncompressed(js)?);
545            }
546            SourceEmbedding::Compressed => {
547                module.customs.add(SourceCodeSection::compressed(js)?);
548            }
549        }
550
551        let wasm = self.postprocess(&mut module)?;
552        Ok(wasm)
553    }
554}