typst_library/foundations/
plugin.rs

1use std::fmt::{self, Debug, Formatter};
2use std::hash::{Hash, Hasher};
3use std::sync::{Arc, Mutex};
4
5use ecow::{EcoString, eco_format};
6use typst_syntax::Spanned;
7use wasmi::Memory;
8
9use crate::diag::{At, SourceResult, StrResult, bail};
10use crate::engine::Engine;
11use crate::foundations::{Binding, Bytes, Func, Module, Scope, Value, cast, func, scope};
12use crate::loading::{DataSource, Load};
13
14/// Loads a WebAssembly module.
15///
16/// The resulting [module] will contain one Typst [function] for each function
17/// export of the loaded WebAssembly module.
18///
19/// Typst WebAssembly plugins need to follow a specific
20/// [protocol]($plugin/#protocol). To run as a plugin, a program needs to be
21/// compiled to a 32-bit shared WebAssembly library. Plugin functions may accept
22/// multiple [byte buffers]($bytes) as arguments and return a single byte
23/// buffer. They should typically be wrapped in idiomatic Typst functions that
24/// perform the necessary conversions between native Typst types and bytes by
25/// leveraging [`str`]($str/#constructor), [`bytes`]($bytes/#constructor), and
26/// [data loading functions]($reference/data-loading).
27///
28/// For security reasons, plugins run in isolation from your system. This means
29/// that printing, reading files, or similar things are not supported.
30///
31/// # Example
32/// ```example
33/// #let myplugin = plugin("hello.wasm")
34/// #let concat(a, b) = str(
35///   myplugin.concatenate(
36///     bytes(a),
37///     bytes(b),
38///   )
39/// )
40///
41/// #concat("hello", "world")
42/// ```
43///
44/// Since the plugin function returns a module, it can be used with import
45/// syntax:
46/// ```typ
47/// #import plugin("hello.wasm"): concatenate
48/// ```
49///
50/// # Purity
51/// Plugin functions **must be pure:** A plugin function call must not have any
52/// observable side effects on future plugin calls and given the same arguments,
53/// it must always return the same value.
54///
55/// The reason for this is that Typst functions must be pure (which is quite
56/// fundamental to the language design) and, since Typst function can call
57/// plugin functions, this requirement is inherited. In particular, if a plugin
58/// function is called twice with the same arguments, Typst might cache the
59/// results and call your function only once. Moreover, Typst may run multiple
60/// instances of your plugin in multiple threads, with no state shared between
61/// them.
62///
63/// Typst does not enforce plugin function purity (for efficiency reasons), but
64/// calling an impure function will lead to unpredictable and irreproducible
65/// results and must be avoided.
66///
67/// That said, mutable operations _can be_ useful for plugins that require
68/// costly runtime initialization. Due to the purity requirement, such
69/// initialization cannot be performed through a normal function call. Instead,
70/// Typst exposes a [plugin transition API]($plugin.transition), which executes
71/// a function call and then creates a derived module with new functions which
72/// will observe the side effects produced by the transition call. The original
73/// plugin remains unaffected.
74///
75/// # Plugins and Packages
76/// Any Typst code can make use of a plugin simply by including a WebAssembly
77/// file and loading it. However, because the byte-based plugin interface is
78/// quite low-level, plugins are typically exposed through a package containing
79/// the plugin and idiomatic wrapper functions.
80///
81/// # WASI
82/// Many compilers will use the [WASI ABI](https://wasi.dev/) by default or as
83/// their only option (e.g. emscripten), which allows printing, reading files,
84/// etc. This ABI will not directly work with Typst. You will either need to
85/// compile to a different target or [stub all
86/// functions](https://github.com/astrale-sharp/wasm-minimal-protocol/tree/master/crates/wasi-stub).
87///
88/// # Protocol
89/// To be used as a plugin, a WebAssembly module must conform to the following
90/// protocol:
91///
92/// ## Exports
93/// A plugin module can export functions to make them callable from Typst. To
94/// conform to the protocol, an exported function should:
95///
96/// - Take `n` 32-bit integer arguments `a_1`, `a_2`, ..., `a_n` (interpreted as
97///   lengths, so `usize/size_t` may be preferable), and return one 32-bit
98///   integer.
99///
100/// - The function should first allocate a buffer `buf` of length `a_1 + a_2 +
101///   ... + a_n`, and then call
102///   `wasm_minimal_protocol_write_args_to_buffer(buf.ptr)`.
103///
104/// - The `a_1` first bytes of the buffer now constitute the first argument, the
105///   `a_2` next bytes the second argument, and so on.
106///
107/// - The function can now do its job with the arguments and produce an output
108///   buffer. Before returning, it should call
109///   `wasm_minimal_protocol_send_result_to_host` to send its result back to the
110///   host.
111///
112/// - To signal success, the function should return `0`.
113///
114/// - To signal an error, the function should return `1`. The written buffer is
115///   then interpreted as an UTF-8 encoded error message.
116///
117/// ## Imports
118/// Plugin modules need to import two functions that are provided by the
119/// runtime. (Types and functions are described using WAT syntax.)
120///
121/// - `(import "typst_env" "wasm_minimal_protocol_write_args_to_buffer" (func
122///   (param i32)))`
123///
124///   Writes the arguments for the current function into a plugin-allocated
125///   buffer. When a plugin function is called, it [receives the
126///   lengths](#exports) of its input buffers as arguments. It should then
127///   allocate a buffer whose capacity is at least the sum of these lengths. It
128///   should then call this function with a `ptr` to the buffer to fill it with
129///   the arguments, one after another.
130///
131/// - `(import "typst_env" "wasm_minimal_protocol_send_result_to_host" (func
132///   (param i32 i32)))`
133///
134///   Sends the output of the current function to the host (Typst). The first
135///   parameter shall be a pointer to a buffer (`ptr`), while the second is the
136///   length of that buffer (`len`). The memory pointed at by `ptr` can be freed
137///   immediately after this function returns. If the message should be
138///   interpreted as an error message, it should be encoded as UTF-8.
139///
140/// # Resources
141/// For more resources, check out the [wasm-minimal-protocol
142/// repository](https://github.com/astrale-sharp/wasm-minimal-protocol). It
143/// contains:
144///
145/// - A list of example plugin implementations and a test runner for these
146///   examples
147/// - Wrappers to help you write your plugin in Rust (Zig wrapper in
148///   development)
149/// - A stubber for WASI
150#[func(scope)]
151pub fn plugin(
152    engine: &mut Engine,
153    /// A [path]($syntax/#paths) to a WebAssembly file or raw WebAssembly bytes.
154    source: Spanned<DataSource>,
155) -> SourceResult<Module> {
156    let loaded = source.load(engine.world)?;
157    Plugin::module(loaded.data).at(source.span)
158}
159
160#[scope]
161impl plugin {
162    /// Calls a plugin function that has side effects and returns a new module
163    /// with plugin functions that are guaranteed to have observed the results
164    /// of the mutable call.
165    ///
166    /// Note that calling an impure function through a normal function call
167    /// (without use of the transition API) is forbidden and leads to
168    /// unpredictable behaviour. Read the [section on purity]($plugin/#purity)
169    /// for more details.
170    ///
171    /// In the example below, we load the plugin `hello-mut.wasm` which exports
172    /// two functions: The `get()` function retrieves a global array as a
173    /// string. The `add(value)` function adds a value to the global array.
174    ///
175    /// We call `add` via the transition API. The call `mutated.get()` on the
176    /// derived module will observe the addition. Meanwhile the original module
177    /// remains untouched as demonstrated by the `base.get()` call.
178    ///
179    /// _Note:_ Due to limitations in the internal WebAssembly implementation,
180    /// the transition API can only guarantee to reflect changes in the plugin's
181    /// memory, not in WebAssembly globals. If your plugin relies on changes to
182    /// globals being visible after transition, you might want to avoid use of
183    /// the transition API for now. We hope to lift this limitation in the
184    /// future.
185    ///
186    /// ```typ
187    /// #let base = plugin("hello-mut.wasm")
188    /// #assert.eq(base.get(), "[]")
189    ///
190    /// #let mutated = plugin.transition(base.add, "hello")
191    /// #assert.eq(base.get(), "[]")
192    /// #assert.eq(mutated.get(), "[hello]")
193    /// ```
194    #[func]
195    pub fn transition(
196        /// The plugin function to call.
197        func: PluginFunc,
198        /// The byte buffers to call the function with.
199        #[variadic]
200        arguments: Vec<Bytes>,
201    ) -> StrResult<Module> {
202        func.transition(arguments)
203    }
204}
205
206/// A function loaded from a WebAssembly plugin.
207#[derive(Debug, Clone, PartialEq, Hash)]
208pub struct PluginFunc {
209    /// The underlying plugin, shared by this and the other functions.
210    plugin: Arc<Plugin>,
211    /// The name of the plugin function.
212    name: EcoString,
213}
214
215impl PluginFunc {
216    /// The name of the plugin function.
217    pub fn name(&self) -> &EcoString {
218        &self.name
219    }
220
221    /// Call the WebAssembly function with the given arguments.
222    #[comemo::memoize]
223    #[typst_macros::time(name = "call plugin")]
224    pub fn call(&self, args: Vec<Bytes>) -> StrResult<Bytes> {
225        self.plugin.call(&self.name, args)
226    }
227
228    /// Transition a plugin and turn the result into a module.
229    #[comemo::memoize]
230    #[typst_macros::time(name = "transition plugin")]
231    pub fn transition(&self, args: Vec<Bytes>) -> StrResult<Module> {
232        self.plugin.transition(&self.name, args).map(Plugin::into_module)
233    }
234}
235
236cast! {
237    PluginFunc,
238    self => Value::Func(self.into()),
239    v: Func => v.to_plugin().ok_or("expected plugin function")?.clone(),
240}
241
242/// A plugin with potentially multiple instances for multi-threaded
243/// execution.
244struct Plugin {
245    /// Shared by all variants of the plugin.
246    base: Arc<PluginBase>,
247    /// A pool of plugin instances.
248    ///
249    /// When multiple plugin calls run concurrently due to multi-threading, we
250    /// create new instances whenever we run out of ones.
251    pool: Mutex<Vec<PluginInstance>>,
252    /// A snapshot that new instances should be restored to.
253    snapshot: Option<Snapshot>,
254    /// A combined hash that incorporates all function names and arguments used
255    /// in transitions of this plugin, such that this plugin has a deterministic
256    /// hash and equality check that can differentiate it from "siblings" (same
257    /// base, different transitions).
258    fingerprint: u128,
259}
260
261impl Plugin {
262    /// Create a plugin and turn it into a module.
263    #[comemo::memoize]
264    #[typst_macros::time(name = "load plugin")]
265    fn module(bytes: Bytes) -> StrResult<Module> {
266        Self::new(bytes).map(Self::into_module)
267    }
268
269    /// Create a new plugin from raw WebAssembly bytes.
270    fn new(bytes: Bytes) -> StrResult<Self> {
271        let mut config = wasmi::Config::default();
272
273        // Disable relaxed SIMD as it can introduce non-determinism.
274        config.wasm_relaxed_simd(false);
275
276        let engine = wasmi::Engine::new(&config);
277        let module = wasmi::Module::new(&engine, bytes.as_slice())
278            .map_err(|err| format!("failed to load WebAssembly module ({err})"))?;
279
280        // Ensure that the plugin exports its memory.
281        if !matches!(module.get_export("memory"), Some(wasmi::ExternType::Memory(_))) {
282            bail!("plugin does not export its memory");
283        }
284
285        let mut linker = wasmi::Linker::new(&engine);
286        linker
287            .func_wrap(
288                "typst_env",
289                "wasm_minimal_protocol_send_result_to_host",
290                wasm_minimal_protocol_send_result_to_host,
291            )
292            .unwrap();
293        linker
294            .func_wrap(
295                "typst_env",
296                "wasm_minimal_protocol_write_args_to_buffer",
297                wasm_minimal_protocol_write_args_to_buffer,
298            )
299            .unwrap();
300
301        let base = Arc::new(PluginBase { bytes, linker, module });
302        let instance = PluginInstance::new(&base, None)?;
303
304        Ok(Self {
305            base,
306            snapshot: None,
307            fingerprint: 0,
308            pool: Mutex::new(vec![instance]),
309        })
310    }
311
312    /// Execute a function with access to an instsance.
313    fn call(&self, func: &str, args: Vec<Bytes>) -> StrResult<Bytes> {
314        // Acquire an instance from the pool (potentially creating a new one).
315        let mut instance = self.acquire()?;
316
317        // Execute the call on an instance from the pool. If the call fails, we
318        // return early and _don't_ return the instance to the pool as it might
319        // be irrecoverably damaged.
320        let output = instance.call(func, args)?;
321
322        // Return the instance to the pool.
323        self.pool.lock().unwrap().push(instance);
324
325        Ok(output)
326    }
327
328    /// Call a mutable plugin function, producing a new mutable whose functions
329    /// are guaranteed to be able to observe the mutation.
330    fn transition(&self, func: &str, args: Vec<Bytes>) -> StrResult<Plugin> {
331        // Derive a new transition hash from the old one and the function and arguments.
332        let fingerprint = typst_utils::hash128(&(self.fingerprint, func, &args));
333
334        // Execute the mutable call on an instance.
335        let mut instance = self.acquire()?;
336
337        // Call the function. If the call fails, we return early and _don't_
338        // return the instance to the pool as it might be irrecoverably damaged.
339        instance.call(func, args)?;
340
341        // Snapshot the instance after the mutable call.
342        let snapshot = instance.snapshot();
343
344        // Create a new plugin and move (this is important!) the used instance
345        // into it, so that the old plugin won't observe the mutation. Also
346        // save the snapshot so that instances that are initialized for the
347        // transitioned plugin's pool observe the mutation.
348        Ok(Self {
349            base: self.base.clone(),
350            snapshot: Some(snapshot),
351            fingerprint,
352            pool: Mutex::new(vec![instance]),
353        })
354    }
355
356    /// Acquire an instance from the pool (or create a new one).
357    fn acquire(&self) -> StrResult<PluginInstance> {
358        // Don't use match to ensure that the lock is released before we create
359        // a new instance.
360        if let Some(instance) = self.pool.lock().unwrap().pop() {
361            return Ok(instance);
362        }
363
364        PluginInstance::new(&self.base, self.snapshot.as_ref())
365    }
366
367    /// Turn a plugin into a Typst module containing plugin functions.
368    fn into_module(self) -> Module {
369        let shared = Arc::new(self);
370
371        // Build a scope from the collected functions.
372        let mut scope = Scope::new();
373        for export in shared.base.module.exports() {
374            if matches!(export.ty(), wasmi::ExternType::Func(_)) {
375                let name = EcoString::from(export.name());
376                let func = PluginFunc { plugin: shared.clone(), name: name.clone() };
377                scope.bind(name, Binding::detached(Func::from(func)));
378            }
379        }
380
381        Module::anonymous(scope)
382    }
383}
384
385impl Debug for Plugin {
386    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
387        f.pad("Plugin(..)")
388    }
389}
390
391impl PartialEq for Plugin {
392    fn eq(&self, other: &Self) -> bool {
393        self.base.bytes == other.base.bytes && self.fingerprint == other.fingerprint
394    }
395}
396
397impl Hash for Plugin {
398    fn hash<H: Hasher>(&self, state: &mut H) {
399        self.base.bytes.hash(state);
400        self.fingerprint.hash(state);
401    }
402}
403
404/// Shared by all pooled & transitioned variants of the plugin.
405struct PluginBase {
406    /// The raw WebAssembly bytes.
407    bytes: Bytes,
408    /// The compiled WebAssembly module.
409    module: wasmi::Module,
410    /// A linker used to create a `Store` for execution.
411    linker: wasmi::Linker<CallData>,
412}
413
414/// An single plugin instance for single-threaded execution.
415struct PluginInstance {
416    /// The underlying wasmi instance.
417    instance: wasmi::Instance,
418    /// The execution store of this concrete plugin instance.
419    store: wasmi::Store<CallData>,
420}
421
422/// A snapshot of a plugin instance.
423struct Snapshot {
424    /// The number of pages in the main memory.
425    mem_pages: u64,
426    /// The data in the main memory.
427    mem_data: Vec<u8>,
428}
429
430impl PluginInstance {
431    /// Create a new execution instance of a plugin, potentially restoring
432    /// a snapshot.
433    #[typst_macros::time(name = "create plugin instance")]
434    fn new(base: &PluginBase, snapshot: Option<&Snapshot>) -> StrResult<PluginInstance> {
435        let mut store = wasmi::Store::new(base.linker.engine(), CallData::default());
436        let instance = base
437            .linker
438            .instantiate_and_start(&mut store, &base.module)
439            .map_err(|e| eco_format!("{e}"))?;
440
441        let mut instance = PluginInstance { instance, store };
442        if let Some(snapshot) = snapshot {
443            instance.restore(snapshot);
444        }
445        Ok(instance)
446    }
447
448    /// Call a plugin function with byte arguments.
449    fn call(&mut self, func: &str, args: Vec<Bytes>) -> StrResult<Bytes> {
450        let handle = self
451            .instance
452            .get_export(&self.store, func)
453            .unwrap()
454            .into_func()
455            .unwrap();
456        let ty = handle.ty(&self.store);
457
458        // Check function signature. Do this lazily only when a function is called
459        // because there might be exported functions like `_initialize` that don't
460        // match the schema.
461        if ty.params().iter().any(|&v| v != wasmi::core::ValType::I32) {
462            bail!(
463                "plugin function `{func}` has a parameter that is not a 32-bit integer"
464            );
465        }
466        if ty.results() != [wasmi::core::ValType::I32] {
467            bail!("plugin function `{func}` does not return exactly one 32-bit integer");
468        }
469
470        // Check inputs.
471        let expected = ty.params().len();
472        let given = args.len();
473        if expected != given {
474            bail!(
475                "plugin function takes {expected} argument{}, but {given} {} given",
476                if expected == 1 { "" } else { "s" },
477                if given == 1 { "was" } else { "were" },
478            );
479        }
480
481        // Collect the lengths of the argument buffers.
482        let lengths = args
483            .iter()
484            .map(|a| wasmi::Val::I32(a.len() as i32))
485            .collect::<Vec<_>>();
486
487        // Store the input data.
488        self.store.data_mut().args = args;
489
490        // Call the function.
491        let mut code = wasmi::Val::I32(-1);
492        handle
493            .call(&mut self.store, &lengths, std::slice::from_mut(&mut code))
494            .map_err(|err| eco_format!("plugin panicked: {err}"))?;
495
496        if let Some(MemoryError { offset, length, write }) =
497            self.store.data_mut().memory_error.take()
498        {
499            return Err(eco_format!(
500                "plugin tried to {kind} out of bounds: \
501                 pointer {offset:#x} is out of bounds for {kind} of length {length}",
502                kind = if write { "write" } else { "read" }
503            ));
504        }
505
506        // Extract the returned data.
507        let output = std::mem::take(&mut self.store.data_mut().output);
508
509        // Parse the functions return value.
510        match code {
511            wasmi::Val::I32(0) => {}
512            wasmi::Val::I32(1) => match std::str::from_utf8(&output) {
513                Ok(message) => bail!("plugin errored with: {message}"),
514                Err(_) => {
515                    bail!("plugin errored, but did not return a valid error message")
516                }
517            },
518            _ => bail!("plugin did not respect the protocol"),
519        };
520
521        Ok(Bytes::new(output))
522    }
523
524    /// Creates a snapshot of this instance from which another one can be
525    /// initialized.
526    #[typst_macros::time(name = "save snapshot")]
527    fn snapshot(&self) -> Snapshot {
528        let memory = self.memory();
529        let mem_pages = memory.size(&self.store);
530        let mem_data = memory.data(&self.store).to_vec();
531        Snapshot { mem_pages, mem_data }
532    }
533
534    /// Restores the instance to a snapshot.
535    #[typst_macros::time(name = "restore snapshot")]
536    fn restore(&mut self, snapshot: &Snapshot) {
537        let memory = self.memory();
538        let current_size = memory.size(&self.store);
539        if current_size < snapshot.mem_pages {
540            memory
541                .grow(&mut self.store, snapshot.mem_pages - current_size)
542                .unwrap();
543        }
544
545        memory.data_mut(&mut self.store)[..snapshot.mem_data.len()]
546            .copy_from_slice(&snapshot.mem_data);
547    }
548
549    /// Retrieves a handle to the plugin's main memory.
550    fn memory(&self) -> Memory {
551        self.instance
552            .get_export(&self.store, "memory")
553            .unwrap()
554            .into_memory()
555            .unwrap()
556    }
557}
558
559/// The persistent store data used for communication between store and host.
560#[derive(Default)]
561struct CallData {
562    /// Arguments for a current call.
563    args: Vec<Bytes>,
564    /// The results of the current call.
565    output: Vec<u8>,
566    /// A memory error that occurred during execution of the current call.
567    memory_error: Option<MemoryError>,
568}
569
570/// If there was an error reading/writing memory, keep the offset + length to
571/// display an error message.
572struct MemoryError {
573    offset: u32,
574    length: u32,
575    write: bool,
576}
577
578/// Write the arguments to the plugin function into the plugin's memory.
579fn wasm_minimal_protocol_write_args_to_buffer(
580    mut caller: wasmi::Caller<CallData>,
581    ptr: u32,
582) {
583    let memory = caller.get_export("memory").unwrap().into_memory().unwrap();
584    let arguments = std::mem::take(&mut caller.data_mut().args);
585    let mut offset = ptr as usize;
586    for arg in arguments {
587        if memory.write(&mut caller, offset, arg.as_slice()).is_err() {
588            caller.data_mut().memory_error = Some(MemoryError {
589                offset: offset as u32,
590                length: arg.len() as u32,
591                write: true,
592            });
593            return;
594        }
595        offset += arg.len();
596    }
597}
598
599/// Extracts the output of the plugin function from the plugin's memory.
600fn wasm_minimal_protocol_send_result_to_host(
601    mut caller: wasmi::Caller<CallData>,
602    ptr: u32,
603    len: u32,
604) {
605    let memory = caller.get_export("memory").unwrap().into_memory().unwrap();
606    let mut buffer = std::mem::take(&mut caller.data_mut().output);
607    buffer.resize(len as usize, 0);
608    if memory.read(&caller, ptr as _, &mut buffer).is_err() {
609        caller.data_mut().memory_error =
610            Some(MemoryError { offset: ptr, length: len, write: false });
611        return;
612    }
613    caller.data_mut().output = buffer;
614}