javy_codegen/lib.rs
1//! WebAssembly Code Generation for JavaScript
2//!
3//! This module provides functionality to emit Wasm modules which will run
4//! JavaScript source code with the QuickJS interpreter.
5//!
6//! Javy supports two main code generation paths:
7//!
8//! 1. Static code generation
9//! 2. Dynamic code generation
10//!
11//! ## Static code generation
12//!
13//! A single unit of code is generated, which is a Wasm module consisting of the
14//! bytecode representation of a given JavaScript program and the code for
15//! a particular version of the QuickJS engine compiled to Wasm.
16//!
17//! The generated Wasm module is self contained and the bytecode version matches
18//! the exact requirements of the embedded QuickJs engine.
19//!
20//! ## Dynamic code generation
21//!
22//! A single unit of code is generated, which is a Wasm module consisting of the
23//! bytecode representation of a given JavaScript program. The JavaScript
24//! bytecode is stored as part of the data section of the module which also
25//! contains instructions to execute that bytecode through dynamic linking
26//! at runtime.
27//!
28//! Dynamic code generation requires a plugin module to be used and linked
29//! against at runtime in order to execute the JavaScript bytecode. This
30//! operation involves carefully ensuring that a given plugin version matches
31//! the plugin version of the imports requested by the generated Wasm module
32//! as well as ensuring that any features available in the plugin match the
33//! features requsted by the JavaScript bytecode.
34//!
35//! ## Examples
36//!
37//! Simple Wasm module generation:
38//!
39//! ```no_run
40//! use std::path::Path;
41//! use javy_codegen::{Generator, LinkingKind, Plugin, JS};
42//!
43//! fn main() -> Result<(), Box<dyn std::error::Error>> {
44//! // Load your target Javascript.
45//! let js = JS::from_file(Path::new("example.js"))?;
46//!
47//! // Load existing pre-initialized Javy plugin.
48//! let plugin = Plugin::new_from_path(Path::new("example-plugin.wasm"))?;
49//!
50//! // Configure code generator.
51//! let mut generator = Generator::new(plugin);
52//! generator.linking(LinkingKind::Static);
53//!
54//! // Generate your Wasm module.
55//! let wasm = generator.generate(&js);
56//!
57//! Ok(())
58//! }
59//! ```
60//!
61//! ## Core concepts
62//! * [`Generator`] - The main entry point for generating Wasm modules.
63//! * [`Plugin`] - An initialized Javy plugin.
64//! * [`JS`] - JavaScript source code.
65//!
66//! ## Features
67//!
68//! * `plugin_internal` - Enables additional code generation options for
69//! internal use. Please note that this flag enables an unstable feature. The
70//! unstable API's exposed by this future may break in the future without
71//! notice.
72
73use std::{fs, rc::Rc, sync::OnceLock};
74
75pub(crate) mod bytecode;
76pub(crate) mod exports;
77pub(crate) mod transform;
78
79pub(crate) mod js;
80pub(crate) mod plugin;
81pub(crate) mod wit;
82
83pub use crate::js::JS;
84pub use crate::plugin::Plugin;
85pub use crate::wit::WitOptions;
86use crate::{exports::Exports, plugin::PluginKind};
87
88use transform::SourceCodeSection;
89use walrus::{
90 DataId, DataKind, ExportItem, FunctionBuilder, FunctionId, LocalId, MemoryId, Module, ValType,
91};
92use wasm_opt::{OptimizationOptions, ShrinkLevel};
93use wasmtime_wasi::{pipe::MemoryInputPipe, WasiCtxBuilder};
94use wizer::{Linker, Wizer};
95
96use anyhow::Result;
97
98static STDIN_PIPE: OnceLock<MemoryInputPipe> = OnceLock::new();
99
100/// The kind of linking to use.
101#[derive(Debug, Clone, Default)]
102pub enum LinkingKind {
103 #[default]
104 /// Static linking
105 Static,
106 /// Dynamic linking
107 Dynamic,
108}
109
110/// Source code embedding options for the generated Wasm module.
111#[derive(Debug, Clone, Default)]
112pub enum SourceEmbedding {
113 #[default]
114 /// Embed the source code without compression.
115 Uncompressed,
116 /// Embed the source code with compression.
117 Compressed,
118 /// Don't embed the source code.
119 Omitted,
120}
121
122/// Identifiers used by the generated module.
123// This is an internal detail of this module.
124#[derive(Debug)]
125pub(crate) struct Identifiers {
126 cabi_realloc: FunctionId,
127 eval_bytecode: Option<FunctionId>,
128 invoke: FunctionId,
129 memory: MemoryId,
130}
131
132impl Identifiers {
133 fn new(
134 cabi_realloc: FunctionId,
135 eval_bytecode: Option<FunctionId>,
136 invoke: FunctionId,
137 memory: MemoryId,
138 ) -> Self {
139 Self {
140 cabi_realloc,
141 eval_bytecode,
142 invoke,
143 memory,
144 }
145 }
146}
147
148/// Helper struct to keep track of bytecode metadata.
149// This is an internal detail of this module.
150#[derive(Debug)]
151pub(crate) struct BytecodeMetadata {
152 ptr: LocalId,
153 len: i32,
154 data_section: DataId,
155}
156
157impl BytecodeMetadata {
158 fn new(ptr: LocalId, len: i32, data_section: DataId) -> Self {
159 Self {
160 ptr,
161 len,
162 data_section,
163 }
164 }
165}
166
167/// Generator used to produce Wasm binaries from JS source code.
168#[derive(Debug, Default, Clone)]
169pub struct Generator {
170 /// Plugin to use.
171 pub(crate) plugin: Plugin,
172 /// What kind of linking to use when generating a module.
173 pub(crate) linking: LinkingKind,
174 /// Source code embedding option for the generated module.
175 pub(crate) source_embedding: SourceEmbedding,
176 /// WIT options for code generation.
177 pub(crate) wit_opts: WitOptions,
178 /// JavaScript function exports.
179 pub(crate) function_exports: Exports,
180 /// The kind of plugin a generator will link.
181 plugin_kind: PluginKind,
182 /// An optional JS runtime config provided as JSON bytes.
183 js_runtime_config: Vec<u8>,
184 /// The version string to include in the producers custom section.
185 producer_version: Option<String>,
186}
187
188impl Generator {
189 /// Create a new [`Generator`].
190 pub fn new(plugin: Plugin) -> Self {
191 Self {
192 plugin,
193 ..Self::default()
194 }
195 }
196
197 /// Set the kind of linking (default: [`LinkingKind::Static`])
198 pub fn linking(&mut self, linking: LinkingKind) -> &mut Self {
199 self.linking = linking;
200 self
201 }
202
203 /// Set the source embedding option (default: [`SourceEmbedding::Compressed`])
204 pub fn source_embedding(&mut self, source_embedding: SourceEmbedding) -> &mut Self {
205 self.source_embedding = source_embedding;
206 self
207 }
208
209 /// Set the wit options. (default: Empty [`WitOptions`])
210 pub fn wit_opts(&mut self, wit_opts: wit::WitOptions) -> &mut Self {
211 self.wit_opts = wit_opts;
212 self
213 }
214
215 #[cfg(feature = "plugin_internal")]
216 /// Set true if linking with a V2 plugin module.
217 pub fn linking_v2_plugin(&mut self, value: bool) -> &mut Self {
218 self.plugin_kind = if value {
219 PluginKind::V2
220 } else {
221 PluginKind::User
222 };
223
224 self
225 }
226
227 #[cfg(feature = "plugin_internal")]
228 /// Set the JS runtime configuration options to pass to the module.
229 pub fn js_runtime_config(&mut self, js_runtime_config: Vec<u8>) -> &mut Self {
230 self.js_runtime_config = js_runtime_config;
231 self
232 }
233
234 /// Sets the version string to use in the producers custom section.
235 pub fn producer_version(&mut self, producer_version: String) -> &mut Self {
236 self.producer_version = Some(producer_version);
237 self
238 }
239}
240
241impl Generator {
242 /// Generate the starting module.
243 fn generate_initial_module(&self) -> Result<Module> {
244 let config = transform::module_config();
245 let module = match &self.linking {
246 LinkingKind::Static => {
247 // Copy config JSON into stdin for `initialize-runtime` function.
248 STDIN_PIPE
249 .set(MemoryInputPipe::new(self.js_runtime_config.clone()))
250 .unwrap();
251 let wasm = Wizer::new()
252 .init_func("initialize-runtime")
253 .make_linker(Some(Rc::new(move |engine| {
254 let mut linker = Linker::new(engine);
255 wasmtime_wasi::preview1::add_to_linker_sync(&mut linker, move |cx| {
256 if cx.wasi_ctx.is_none() {
257 // The underlying buffer backing the pipe is an Arc
258 // so the cloning should be fast.
259 let config = STDIN_PIPE.get().unwrap().clone();
260 cx.wasi_ctx = Some(
261 WasiCtxBuilder::new()
262 .stdin(config)
263 .inherit_stdout()
264 .inherit_stderr()
265 .build_p1(),
266 );
267 }
268 cx.wasi_ctx.as_mut().unwrap()
269 })?;
270 Ok(linker)
271 })))?
272 .wasm_bulk_memory(true)
273 .run(self.plugin.as_bytes())?;
274 config.parse(&wasm)?
275 }
276 LinkingKind::Dynamic => Module::with_config(config),
277 };
278 Ok(module)
279 }
280
281 /// Resolve identifiers for functions and memory.
282 pub(crate) fn resolve_identifiers(&self, module: &mut Module) -> Result<Identifiers> {
283 match self.linking {
284 LinkingKind::Static => {
285 let cabi_realloc = module
286 .exports
287 .get_func(self.plugin_kind.realloc_fn_name())?;
288 let invoke = module.exports.get_func("invoke")?;
289 let ExportItem::Memory(memory) = module
290 .exports
291 .iter()
292 .find(|e| e.name == "memory")
293 .ok_or_else(|| anyhow::anyhow!("Missing memory export"))?
294 .item
295 else {
296 anyhow::bail!("Export with name memory must be of type memory")
297 };
298 Ok(Identifiers::new(cabi_realloc, None, invoke, memory))
299 }
300 LinkingKind::Dynamic => {
301 // All code by default is assumed to be linking against a default
302 // or a user provided plugin. However V2 plugins require a different
303 // import namespace to be used instead so we use the plugin_kind to
304 // to determine the import_namespace.
305 let import_namespace = self.plugin_kind.import_namespace(&self.plugin)?;
306
307 let cabi_realloc_type = module.types.add(
308 &[ValType::I32, ValType::I32, ValType::I32, ValType::I32],
309 &[ValType::I32],
310 );
311 let (cabi_realloc_fn_id, _) = module.add_import_func(
312 &import_namespace,
313 self.plugin_kind.realloc_fn_name(),
314 cabi_realloc_type,
315 );
316
317 // User plugins can use `invoke` with a null function name.
318 // Non-v2 plugins also won't have an `eval_bytecode` function to
319 // import.
320 let eval_bytecode_fn_id = if self.plugin_kind == PluginKind::V2 {
321 let eval_bytecode_type = module.types.add(&[ValType::I32, ValType::I32], &[]);
322 let (eval_bytecode_fn_id, _) = module.add_import_func(
323 &import_namespace,
324 "eval_bytecode",
325 eval_bytecode_type,
326 );
327 Some(eval_bytecode_fn_id)
328 } else {
329 None
330 };
331
332 let invoke_params = if self.plugin_kind == PluginKind::V2 {
333 [ValType::I32, ValType::I32, ValType::I32, ValType::I32].as_slice()
334 } else {
335 [
336 ValType::I32,
337 ValType::I32,
338 ValType::I32,
339 ValType::I32,
340 ValType::I32,
341 ]
342 .as_slice()
343 };
344 let invoke_type = module.types.add(invoke_params, &[]);
345 let (invoke_fn_id, _) =
346 module.add_import_func(&import_namespace, "invoke", invoke_type);
347
348 let (memory_id, _) = module.add_import_memory(
349 &import_namespace,
350 "memory",
351 false,
352 false,
353 0,
354 None,
355 None,
356 );
357
358 Ok(Identifiers::new(
359 cabi_realloc_fn_id,
360 eval_bytecode_fn_id,
361 invoke_fn_id,
362 memory_id,
363 ))
364 }
365 }
366 }
367
368 /// Generate the main function.
369 fn generate_main(
370 &self,
371 module: &mut Module,
372 js: &js::JS,
373 imports: &Identifiers,
374 ) -> Result<BytecodeMetadata> {
375 let bytecode = bytecode::compile_source(&self.plugin, self.plugin_kind, js.as_bytes())?;
376 let bytecode_len: i32 = bytecode.len().try_into()?;
377 let bytecode_data = module.data.add(DataKind::Passive, bytecode);
378
379 let mut main = FunctionBuilder::new(&mut module.types, &[], &[]);
380 let bytecode_ptr_local = module.locals.add(ValType::I32);
381 let mut instructions = main.func_body();
382 instructions
383 // Allocate memory in plugin instance for bytecode array.
384 .i32_const(0) // orig ptr
385 .i32_const(0) // orig size
386 .i32_const(1) // alignment
387 .i32_const(bytecode_len) // new size
388 .call(imports.cabi_realloc)
389 // Copy bytecode array into allocated memory.
390 .local_tee(bytecode_ptr_local) // save returned address to local and set as dest addr for mem.init
391 .i32_const(0) // offset into data segment for mem.init
392 .i32_const(bytecode_len) // size to copy from data segment
393 // top-2: dest addr, top-1: offset into source, top-0: size of memory region in bytes.
394 .memory_init(imports.memory, bytecode_data);
395 // Evaluate top level scope.
396 if let Some(eval_bytecode) = imports.eval_bytecode {
397 instructions
398 .local_get(bytecode_ptr_local) // ptr to bytecode
399 .i32_const(bytecode_len)
400 .call(eval_bytecode);
401 } else {
402 // Assert we're not emitting a call with a null function to
403 // invoke for the v2 plugin. `javy_quickjs_provider_v2` will never
404 // support calling `invoke` with a null function. The default
405 // plugin and user plugins do accept null functions.
406 assert!(
407 self.plugin_kind != PluginKind::V2,
408 "Using invoke with null function not supported for v2 plugin"
409 );
410 instructions
411 .local_get(bytecode_ptr_local) // ptr to bytecode
412 .i32_const(bytecode_len)
413 .i32_const(0) // set option discriminator to none
414 .i32_const(0) // set function name ptr to null
415 .i32_const(0) // set function name len to 0
416 .call(imports.invoke);
417 }
418 let main = main.finish(vec![], &mut module.funcs);
419
420 module.exports.add("_start", main);
421 Ok(BytecodeMetadata::new(
422 bytecode_ptr_local,
423 bytecode_len,
424 bytecode_data,
425 ))
426 }
427
428 /// Generate function exports.
429 fn generate_exports(
430 &self,
431 module: &mut Module,
432 identifiers: &Identifiers,
433 bc_metadata: &BytecodeMetadata,
434 ) -> Result<()> {
435 if !self.function_exports.is_empty() {
436 let fn_name_ptr_local = module.locals.add(ValType::I32);
437 for export in &self.function_exports {
438 // For each JS function export, add an export that copies the name of the function into memory and invokes it.
439 let js_export_bytes = export.js.as_bytes();
440 let js_export_len: i32 = js_export_bytes.len().try_into().unwrap();
441 let fn_name_data = module.data.add(DataKind::Passive, js_export_bytes.to_vec());
442
443 let mut export_fn = FunctionBuilder::new(&mut module.types, &[], &[]);
444 export_fn
445 .func_body()
446 // Copy bytecode.
447 .i32_const(0) // orig ptr
448 .i32_const(0) // orig len
449 .i32_const(1) // alignment
450 .i32_const(bc_metadata.len) // size to copy
451 .call(identifiers.cabi_realloc)
452 .local_tee(bc_metadata.ptr)
453 .i32_const(0) // offset into data segment
454 .i32_const(bc_metadata.len) // size to copy
455 .memory_init(identifiers.memory, bc_metadata.data_section) // copy bytecode into allocated memory
456 .data_drop(bc_metadata.data_section)
457 // Copy function name.
458 .i32_const(0) // orig ptr
459 .i32_const(0) // orig len
460 .i32_const(1) // alignment
461 .i32_const(js_export_len) // new size
462 .call(identifiers.cabi_realloc)
463 .local_tee(fn_name_ptr_local)
464 .i32_const(0) // offset into data segment
465 .i32_const(js_export_len) // size to copy
466 .memory_init(identifiers.memory, fn_name_data) // copy fn name into allocated memory
467 .data_drop(fn_name_data)
468 // Call invoke.
469 .local_get(bc_metadata.ptr)
470 .i32_const(bc_metadata.len);
471
472 if self.plugin_kind != PluginKind::V2 {
473 export_fn.func_body().i32_const(1); // set function name option discriminator to some
474 }
475
476 export_fn
477 .func_body()
478 .local_get(fn_name_ptr_local)
479 .i32_const(js_export_len)
480 .call(identifiers.invoke);
481 let export_fn = export_fn.finish(vec![], &mut module.funcs);
482 module.exports.add(&export.wit, export_fn);
483 }
484 }
485 Ok(())
486 }
487
488 /// Clean-up the generated Wasm.
489 fn postprocess(&self, module: &mut Module) -> Result<Vec<u8>> {
490 match self.linking {
491 LinkingKind::Static => {
492 // Remove no longer necessary exports.
493 module.exports.remove(self.plugin_kind.realloc_fn_name())?;
494
495 // Only v2 plugin exposes eval_bytecode function.
496 if self.plugin_kind == PluginKind::V2 {
497 module.exports.remove("eval_bytecode")?;
498 }
499
500 module.exports.remove("invoke")?;
501 module.exports.remove(self.plugin_kind.compile_fn_name())?;
502
503 // Run wasm-opt to optimize.
504 let tempdir = tempfile::tempdir()?;
505 let tempfile_path = tempdir.path().join("temp.wasm");
506
507 module.emit_wasm_file(&tempfile_path)?;
508
509 OptimizationOptions::new_opt_level_3() // Aggressively optimize for speed.
510 .shrink_level(ShrinkLevel::Level0) // Don't optimize for size at the expense of performance.
511 .debug_info(false)
512 .run(&tempfile_path, &tempfile_path)?;
513
514 Ok(fs::read(&tempfile_path)?)
515 }
516 LinkingKind::Dynamic => Ok(module.emit_wasm()),
517 }
518 }
519
520 /// Generate a Wasm module which will run the provided JS source code.
521 pub fn generate(&mut self, js: &js::JS) -> Result<Vec<u8>> {
522 if self.wit_opts.defined() {
523 self.function_exports = exports::process_exports(
524 js,
525 self.wit_opts.unwrap_path(),
526 self.wit_opts.unwrap_world(),
527 )?;
528 }
529
530 let mut module = self.generate_initial_module()?;
531 let identifiers = self.resolve_identifiers(&mut module)?;
532 let bc_metadata = self.generate_main(&mut module, js, &identifiers)?;
533 self.generate_exports(&mut module, &identifiers, &bc_metadata)?;
534
535 transform::add_producers_section(
536 &mut module.producers,
537 self.producer_version
538 .as_deref()
539 .unwrap_or(env!("CARGO_PKG_VERSION")),
540 );
541 match self.source_embedding {
542 SourceEmbedding::Omitted => {}
543 SourceEmbedding::Uncompressed => {
544 module.customs.add(SourceCodeSection::uncompressed(js)?);
545 }
546 SourceEmbedding::Compressed => {
547 module.customs.add(SourceCodeSection::compressed(js)?);
548 }
549 }
550
551 let wasm = self.postprocess(&mut module)?;
552 Ok(wasm)
553 }
554}