javy_codegen/lib.rs
1//! WebAssembly Code Generation for JavaScript
2//!
3//! This module provides functionality to emit Wasm modules which will run
4//! JavaScript source code with the QuickJS interpreter.
5//!
6//! Javy supports two main code generation paths:
7//!
8//! 1. Static code generation
9//! 2. Dynamic code generation
10//!
11//! ## Static code generation
12//!
13//! A single unit of code is generated, which is a Wasm module consisting of the
14//! bytecode representation of a given JavaScript program and the code for
15//! a particular version of the QuickJS engine compiled to Wasm.
16//!
17//! The generated Wasm module is self contained and the bytecode version matches
18//! the exact requirements of the embedded QuickJs engine.
19//!
20//! ## Dynamic code generation
21//!
22//! A single unit of code is generated, which is a Wasm module consisting of the
23//! bytecode representation of a given JavaScript program. The JavaScript
24//! bytecode is stored as part of the data section of the module which also
25//! contains instructions to execute that bytecode through dynamic linking
26//! at runtime.
27//!
28//! Dynamic code generation requires a plugin module to be used and linked
29//! against at runtime in order to execute the JavaScript bytecode. This
30//! operation involves carefully ensuring that a given plugin version matches
31//! the plugin version of the imports requested by the generated Wasm module
32//! as well as ensuring that any features available in the plugin match the
33//! features requsted by the JavaScript bytecode.
34//!
35//! ## Examples
36//!
37//! Simple Wasm module generation:
38//!
39//! ```no_run
40//! use std::path::Path;
41//! use javy_codegen::{Generator, LinkingKind, Plugin, JS};
42//!
43//! fn main() -> Result<(), Box<dyn std::error::Error>> {
44//! // Load your target Javascript.
45//! let js = JS::from_file(Path::new("example.js"))?;
46//!
47//! // Load existing pre-initialized Javy plugin.
48//! let plugin = Plugin::new_from_path(Path::new("example-plugin.wasm"))?;
49//!
50//! // Configure code generator.
51//! let mut generator = Generator::new(plugin);
52//! generator.linking(LinkingKind::Static);
53//!
54//! // Generate your Wasm module.
55//! let wasm = generator.generate(&js);
56//!
57//! Ok(())
58//! }
59//! ```
60//!
61//! ## Core concepts
62//! * [`Generator`] - The main entry point for generating Wasm modules.
63//! * [`Plugin`] - An initialized Javy plugin.
64//! * [`JS`] - JavaScript source code.
65//!
66//! ## Features
67//!
68//! * `plugin_internal` - Enables additional code generation options for
69//! internal use. Please note that this flag enables an unstable feature. The
70//! unstable API's exposed by this future may break in the future without
71//! notice.
72
73use std::{fs, rc::Rc, sync::OnceLock};
74
75pub(crate) mod bytecode;
76pub(crate) mod exports;
77pub(crate) mod transform;
78
79pub(crate) mod js;
80pub(crate) mod plugin;
81pub(crate) mod wit;
82
83pub use crate::js::JS;
84pub use crate::plugin::Plugin;
85pub use crate::wit::WitOptions;
86
87use transform::SourceCodeSection;
88use walrus::{
89 DataId, DataKind, ExportItem, FunctionBuilder, FunctionId, LocalId, MemoryId, Module, ValType,
90};
91use wasm_opt::{OptimizationOptions, ShrinkLevel};
92use wasmtime_wasi::{pipe::MemoryInputPipe, WasiCtxBuilder};
93use wizer::{Linker, Wizer};
94
95use anyhow::Result;
96
97static STDIN_PIPE: OnceLock<MemoryInputPipe> = OnceLock::new();
98
99/// The kind of linking to use.
100#[derive(Clone, Default)]
101pub enum LinkingKind {
102 #[default]
103 /// Static linking
104 Static,
105 /// Dynamic linking
106 Dynamic,
107}
108
109/// Identifiers used by the generated module.
110// This is an internal detail of this module.
111pub(crate) struct Identifiers {
112 canonical_abi_realloc: FunctionId,
113 eval_bytecode: Option<FunctionId>,
114 invoke: FunctionId,
115 memory: MemoryId,
116}
117
118impl Identifiers {
119 fn new(
120 canonical_abi_realloc: FunctionId,
121 eval_bytecode: Option<FunctionId>,
122 invoke: FunctionId,
123 memory: MemoryId,
124 ) -> Self {
125 Self {
126 canonical_abi_realloc,
127 eval_bytecode,
128 invoke,
129 memory,
130 }
131 }
132}
133
134/// Helper struct to keep track of bytecode metadata.
135// This is an internal detail of this module.
136pub(crate) struct BytecodeMetadata {
137 ptr: LocalId,
138 len: i32,
139 data_section: DataId,
140}
141
142impl BytecodeMetadata {
143 fn new(ptr: LocalId, len: i32, data_section: DataId) -> Self {
144 Self {
145 ptr,
146 len,
147 data_section,
148 }
149 }
150}
151
152/// Generator used to produce Wasm binaries from JS source code.
153#[derive(Default, Clone)]
154pub struct Generator {
155 /// Plugin to use.
156 pub(crate) plugin: plugin::Plugin,
157 /// What kind of linking to use when generating a module.
158 pub(crate) linking: LinkingKind,
159 /// Whether to embed the compressed JS source in the generated module.
160 pub(crate) source_compression: bool,
161 /// WIT options for code generation.
162 pub(crate) wit_opts: wit::WitOptions,
163 /// JavaScript function exports.
164 pub(crate) function_exports: exports::Exports,
165 /// The kind of plugin a generator will link.
166 plugin_kind: plugin::PluginKind,
167 /// An optional JS runtime config provided as JSON bytes.
168 js_runtime_config: Vec<u8>,
169}
170
171impl Generator {
172 /// Create a new [`Generator`].
173 pub fn new(plugin: Plugin) -> Self {
174 Self {
175 plugin,
176 ..Self::default()
177 }
178 }
179
180 /// Set the kind of linking (default: [`LinkingKind::Static`])
181 pub fn linking(&mut self, linking: LinkingKind) -> &mut Self {
182 self.linking = linking;
183 self
184 }
185
186 /// Set if JS source compression is enabled (default: false).
187 pub fn source_compression(&mut self, source_compression: bool) -> &mut Self {
188 self.source_compression = source_compression;
189 self
190 }
191
192 /// Set the wit options. (default: Empty [`WitOptions`])
193 pub fn wit_opts(&mut self, wit_opts: wit::WitOptions) -> &mut Self {
194 self.wit_opts = wit_opts;
195 self
196 }
197
198 #[cfg(feature = "plugin_internal")]
199 /// Set true if linking with a default plugin module.
200 pub fn linking_default_plugin(&mut self, value: bool) -> &mut Self {
201 self.plugin_kind = if value {
202 plugin::PluginKind::Default
203 } else {
204 plugin::PluginKind::User
205 };
206
207 self
208 }
209
210 #[cfg(feature = "plugin_internal")]
211 /// Set true if linking with a V2 plugin module.
212 pub fn linking_v2_plugin(&mut self, value: bool) -> &mut Self {
213 self.plugin_kind = if value {
214 plugin::PluginKind::V2
215 } else {
216 plugin::PluginKind::User
217 };
218
219 self
220 }
221
222 #[cfg(feature = "plugin_internal")]
223 /// Set the JS runtime configuration options to pass to the module.
224 pub fn js_runtime_config(&mut self, js_runtime_config: Vec<u8>) -> &mut Self {
225 self.js_runtime_config = js_runtime_config;
226 self
227 }
228}
229
230impl Generator {
231 /// Generate the starting module.
232 fn generate_initial_module(&self) -> Result<Module> {
233 let config = transform::module_config();
234 let module = match &self.linking {
235 LinkingKind::Static => {
236 // Copy config JSON into stdin for `initialize_runtime` function.
237 STDIN_PIPE
238 .set(MemoryInputPipe::new(self.js_runtime_config.clone()))
239 .unwrap();
240 let wasm = Wizer::new()
241 .init_func("initialize_runtime")
242 .make_linker(Some(Rc::new(move |engine| {
243 let mut linker = Linker::new(engine);
244 wasmtime_wasi::preview1::add_to_linker_sync(&mut linker, move |cx| {
245 if cx.wasi_ctx.is_none() {
246 // The underlying buffer backing the pipe is an Arc
247 // so the cloning should be fast.
248 let config = STDIN_PIPE.get().unwrap().clone();
249 cx.wasi_ctx = Some(
250 WasiCtxBuilder::new()
251 .stdin(config)
252 .inherit_stdout()
253 .inherit_stderr()
254 .build_p1(),
255 );
256 }
257 cx.wasi_ctx.as_mut().unwrap()
258 })?;
259 Ok(linker)
260 })))?
261 .wasm_bulk_memory(true)
262 .run(self.plugin.as_bytes())?;
263 config.parse(&wasm)?
264 }
265 LinkingKind::Dynamic => Module::with_config(config),
266 };
267 Ok(module)
268 }
269
270 /// Resolve identifiers for functions and memory.
271 pub(crate) fn resolve_identifiers(&self, module: &mut Module) -> Result<Identifiers> {
272 match self.linking {
273 LinkingKind::Static => {
274 let canonical_abi_realloc_fn = module.exports.get_func("canonical_abi_realloc")?;
275 let eval_bytecode = module.exports.get_func("eval_bytecode").ok();
276 let invoke = module.exports.get_func("invoke")?;
277 let ExportItem::Memory(memory) = module
278 .exports
279 .iter()
280 .find(|e| e.name == "memory")
281 .ok_or_else(|| anyhow::anyhow!("Missing memory export"))?
282 .item
283 else {
284 anyhow::bail!("Export with name memory must be of type memory")
285 };
286 Ok(Identifiers::new(
287 canonical_abi_realloc_fn,
288 eval_bytecode,
289 invoke,
290 memory,
291 ))
292 }
293 LinkingKind::Dynamic => {
294 // All code by default is assumed to be linking against a default
295 // or a user provided plugin. However V2 plugins require a different
296 // import namespace to be used instead so we use the plugin_kind to
297 // to determine the import_namespace.
298 let import_namespace = self.plugin_kind.import_namespace(&self.plugin)?;
299
300 let canonical_abi_realloc_type = module.types.add(
301 &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
302 &[ValType::I32],
303 );
304 let (canonical_abi_realloc_fn_id, _) = module.add_import_func(
305 &import_namespace,
306 "canonical_abi_realloc",
307 canonical_abi_realloc_type,
308 );
309
310 // User plugins can use `invoke` with a null function name.
311 // User plugins also won't have an `eval_bytecode` function to
312 // import. We want to remove `eval_bytecode` from the default
313 // plugin so we don't want to emit more uses of it.
314 let eval_bytecode_fn_id = match self.plugin_kind {
315 plugin::PluginKind::V2 => {
316 let eval_bytecode_type =
317 module.types.add(&[ValType::I32, ValType::I32], &[]);
318 let (eval_bytecode_fn_id, _) = module.add_import_func(
319 &import_namespace,
320 "eval_bytecode",
321 eval_bytecode_type,
322 );
323 Some(eval_bytecode_fn_id)
324 }
325 _ => None,
326 };
327
328 let invoke_type = module.types.add(
329 &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
330 &[],
331 );
332 let (invoke_fn_id, _) =
333 module.add_import_func(&import_namespace, "invoke", invoke_type);
334
335 let (memory_id, _) = module.add_import_memory(
336 &import_namespace,
337 "memory",
338 false,
339 false,
340 0,
341 None,
342 None,
343 );
344
345 Ok(Identifiers::new(
346 canonical_abi_realloc_fn_id,
347 eval_bytecode_fn_id,
348 invoke_fn_id,
349 memory_id,
350 ))
351 }
352 }
353 }
354
355 /// Generate the main function.
356 fn generate_main(
357 &self,
358 module: &mut Module,
359 js: &js::JS,
360 imports: &Identifiers,
361 ) -> Result<BytecodeMetadata> {
362 let bytecode = js.compile(&self.plugin)?;
363 let bytecode_len: i32 = bytecode.len().try_into()?;
364 let bytecode_data = module.data.add(DataKind::Passive, bytecode);
365
366 let mut main = FunctionBuilder::new(&mut module.types, &[], &[]);
367 let bytecode_ptr_local = module.locals.add(ValType::I32);
368 let mut instructions = main.func_body();
369 instructions
370 // Allocate memory in plugin instance for bytecode array.
371 .i32_const(0) // orig ptr
372 .i32_const(0) // orig size
373 .i32_const(1) // alignment
374 .i32_const(bytecode_len) // new size
375 .call(imports.canonical_abi_realloc)
376 // Copy bytecode array into allocated memory.
377 .local_tee(bytecode_ptr_local) // save returned address to local and set as dest addr for mem.init
378 .i32_const(0) // offset into data segment for mem.init
379 .i32_const(bytecode_len) // size to copy from data segment
380 // top-2: dest addr, top-1: offset into source, top-0: size of memory region in bytes.
381 .memory_init(imports.memory, bytecode_data);
382 // Evaluate top level scope.
383 if let Some(eval_bytecode) = imports.eval_bytecode {
384 instructions
385 .local_get(bytecode_ptr_local) // ptr to bytecode
386 .i32_const(bytecode_len)
387 .call(eval_bytecode);
388 } else {
389 // Assert we're not emitting a call with a null function to
390 // invoke for the v2 plugin. `javy_quickjs_provider_v2` will never
391 // support calling `invoke` with a null function. The default
392 // plugin and user plugins do accept null functions.
393 assert!(
394 !matches!(self.plugin_kind, plugin::PluginKind::V2),
395 "Using invoke with null function not supported for v2 plugin"
396 );
397 instructions
398 .local_get(bytecode_ptr_local) // ptr to bytecode
399 .i32_const(bytecode_len)
400 .i32_const(0) // set function name ptr to null
401 .i32_const(0) // set function name len to 0
402 .call(imports.invoke);
403 }
404 let main = main.finish(vec![], &mut module.funcs);
405
406 module.exports.add("_start", main);
407 Ok(BytecodeMetadata::new(
408 bytecode_ptr_local,
409 bytecode_len,
410 bytecode_data,
411 ))
412 }
413
414 /// Generate function exports.
415 fn generate_exports(
416 &self,
417 module: &mut Module,
418 identifiers: &Identifiers,
419 bc_metadata: &BytecodeMetadata,
420 ) -> Result<()> {
421 if !self.function_exports.is_empty() {
422 let fn_name_ptr_local = module.locals.add(ValType::I32);
423 for export in &self.function_exports {
424 // For each JS function export, add an export that copies the name of the function into memory and invokes it.
425 let js_export_bytes = export.js.as_bytes();
426 let js_export_len: i32 = js_export_bytes.len().try_into().unwrap();
427 let fn_name_data = module.data.add(DataKind::Passive, js_export_bytes.to_vec());
428
429 let mut export_fn = FunctionBuilder::new(&mut module.types, &[], &[]);
430 export_fn
431 .func_body()
432 // Copy bytecode.
433 .i32_const(0) // orig ptr
434 .i32_const(0) // orig len
435 .i32_const(1) // alignment
436 .i32_const(bc_metadata.len) // size to copy
437 .call(identifiers.canonical_abi_realloc)
438 .local_tee(bc_metadata.ptr)
439 .i32_const(0) // offset into data segment
440 .i32_const(bc_metadata.len) // size to copy
441 .memory_init(identifiers.memory, bc_metadata.data_section) // copy bytecode into allocated memory
442 .data_drop(bc_metadata.data_section)
443 // Copy function name.
444 .i32_const(0) // orig ptr
445 .i32_const(0) // orig len
446 .i32_const(1) // alignment
447 .i32_const(js_export_len) // new size
448 .call(identifiers.canonical_abi_realloc)
449 .local_tee(fn_name_ptr_local)
450 .i32_const(0) // offset into data segment
451 .i32_const(js_export_len) // size to copy
452 .memory_init(identifiers.memory, fn_name_data) // copy fn name into allocated memory
453 .data_drop(fn_name_data)
454 // Call invoke.
455 .local_get(bc_metadata.ptr)
456 .i32_const(bc_metadata.len)
457 .local_get(fn_name_ptr_local)
458 .i32_const(js_export_len)
459 .call(identifiers.invoke);
460 let export_fn = export_fn.finish(vec![], &mut module.funcs);
461 module.exports.add(&export.wit, export_fn);
462 }
463 }
464 Ok(())
465 }
466
467 /// Clean-up the generated Wasm.
468 fn postprocess(&self, module: &mut Module) -> Result<Vec<u8>> {
469 match self.linking {
470 LinkingKind::Static => {
471 // Remove no longer necessary exports.
472 module.exports.remove("canonical_abi_realloc")?;
473
474 // Only internal plugins expose eval_bytecode function.
475 if matches!(
476 self.plugin_kind,
477 plugin::PluginKind::Default | plugin::PluginKind::V2
478 ) {
479 module.exports.remove("eval_bytecode")?;
480 }
481
482 module.exports.remove("invoke")?;
483 module.exports.remove("compile_src")?;
484
485 // Run wasm-opt to optimize.
486 let tempdir = tempfile::tempdir()?;
487 let tempfile_path = tempdir.path().join("temp.wasm");
488
489 module.emit_wasm_file(&tempfile_path)?;
490
491 OptimizationOptions::new_opt_level_3() // Aggressively optimize for speed.
492 .shrink_level(ShrinkLevel::Level0) // Don't optimize for size at the expense of performance.
493 .debug_info(false)
494 .run(&tempfile_path, &tempfile_path)?;
495
496 Ok(fs::read(&tempfile_path)?)
497 }
498 LinkingKind::Dynamic => Ok(module.emit_wasm()),
499 }
500 }
501
502 // For the example generated WAT, the `bytecode_len` is 137
503 // (module
504 // (type (;0;) (func))
505 // (type (;1;) (func (param i32 i32)))
506 // (type (;2;) (func (param i32 i32 i32 i32)))
507 // (type (;3;) (func (param i32 i32 i32 i32) (result i32)))
508 // (import "javy_quickjs_provider_v2" "canonical_abi_realloc" (func (;0;) (type 3)))
509 // (import "javy_quickjs_provider_v2" "eval_bytecode" (func (;1;) (type 1)))
510 // (import "javy_quickjs_provider_v2" "memory" (memory (;0;) 0))
511 // (import "javy_quickjs_provider_v2" "invoke" (func (;2;) (type 2)))
512 // (func (;3;) (type 0)
513 // (local i32 i32)
514 // i32.const 0
515 // i32.const 0
516 // i32.const 1
517 // i32.const 137
518 // call 0
519 // local.tee 0
520 // i32.const 0
521 // i32.const 137
522 // memory.init 0
523 // data.drop 0
524 // i32.const 0
525 // i32.const 0
526 // i32.const 1
527 // i32.const 3
528 // call 0
529 // local.tee 1
530 // i32.const 0
531 // i32.const 3
532 // memory.init 1
533 // data.drop 1
534 // local.get 0
535 // i32.const 137
536 // local.get 1
537 // i32.const 3
538 // call 2
539 // )
540 // (func (;4;) (type 0)
541 // (local i32)
542 // i32.const 0
543 // i32.const 0
544 // i32.const 1
545 // i32.const 137
546 // call 0
547 // local.tee 0
548 // i32.const 0
549 // i32.const 137
550 // memory.init 0
551 // local.get 0
552 // i32.const 137
553 // call 1
554 // )
555 // (export "_start" (func 4))
556 // (export "foo" (func 3))
557 // (data (;0;) "\02\05\18function.mjs\06foo\0econsole\06log\06bar\0f\bc\03\00\01\00\00\be\03\00\00\0e\00\06\01\a0\01\00\00\00\03\01\01\1a\00\be\03\00\01\08\ea\05\c0\00\e1)8\e0\00\00\00B\e1\00\00\00\04\e2\00\00\00$\01\00)\bc\03\01\04\01\00\07\0a\0eC\06\01\be\03\00\00\00\03\00\00\13\008\e0\00\00\00B\e1\00\00\00\04\df\00\00\00$\01\00)\bc\03\01\02\03]")
558 // (data (;1;) "foo")
559 // )
560 /// Generate a Wasm module which will run the provided JS source code.
561 pub fn generate(&mut self, js: &js::JS) -> Result<Vec<u8>> {
562 if self.wit_opts.defined() {
563 self.function_exports = exports::process_exports(
564 js,
565 self.wit_opts.unwrap_path(),
566 self.wit_opts.unwrap_world(),
567 )?;
568 }
569
570 let mut module = self.generate_initial_module()?;
571 let identifiers = self.resolve_identifiers(&mut module)?;
572 let bc_metadata = self.generate_main(&mut module, js, &identifiers)?;
573 self.generate_exports(&mut module, &identifiers, &bc_metadata)?;
574
575 transform::add_producers_section(&mut module.producers);
576 if !self.source_compression {
577 module.customs.add(SourceCodeSection::uncompressed(js)?);
578 } else {
579 module.customs.add(SourceCodeSection::compressed(js)?);
580 }
581
582 let wasm = self.postprocess(&mut module)?;
583 Ok(wasm)
584 }
585}