Skip to main content

pdf_xfa/js_runtime/
mod.rs

1//! M3-B Phase B — JavaScript runtime adapter (skeleton).
2//!
3//! This module is the integration boundary between `crates/pdf-xfa`'s
4//! flatten pipeline and a sandboxed JavaScript runtime. Phase B ships
5//! the boundary plus a `NullRuntime` stub. The rquickjs-backed runtime
6//! is gated behind the `xfa-js-sandboxed` Cargo feature and registers
7//! **no host bindings** — Phase C adds the first useful set per
8//! `benchmarks/runs/M3B_HOST_BINDINGS_MINIMUM_SET.md`.
9//!
10//! The default behaviour of [`crate::dynamic::apply_dynamic_scripts`]
11//! and `flatten_xfa_to_pdf` is unchanged: with the feature off and
12//! mode `BestEffortStatic`, the runtime is never invoked. Adding the
13//! adapter is intentionally behaviour-neutral.
14//!
15//! See `benchmarks/runs/M3B_RUNTIME_SECURITY_MODEL.md` for the 18
16//! invariants the adapter must respect (S-1..S-18).
17
18pub mod host;
19pub mod null;
20
21#[cfg(feature = "xfa-js-sandboxed")]
22pub mod rquickjs_backend;
23
24pub use host::{
25    HostBindings, MutationLogEntry, MAX_INSTANCES_PER_SUBFORM, MAX_ITEMS_PER_LISTBOX,
26    MAX_MUTATIONS_PER_DOC, MAX_RESOLVE_CALLS_PER_SCRIPT, MAX_RESOLVE_RESULTS, MAX_SOM_DEPTH,
27};
28pub use null::NullRuntime;
29#[cfg(feature = "xfa-js-sandboxed")]
30pub use rquickjs_backend::QuickJsRuntime;
31
32use xfa_dom_resolver::data_dom::DataDom;
33use xfa_layout_engine::form::{FormNodeId, FormTree};
34
35/// Outcome of evaluating one script body inside the sandbox.
36#[derive(Debug, Clone, Default)]
37pub struct RuntimeOutcome {
38    /// True when the script ran to completion inside the sandbox.
39    pub executed: bool,
40    /// Number of host-tree mutations the script applied via the
41    /// (currently empty) host-binding allowlist. Always 0 in Phase B.
42    pub mutated_field_count: usize,
43}
44
45/// Errors the runtime adapter can emit. Every variant is recoverable
46/// at the dispatch site — the parent flatten never aborts because of
47/// a sandbox error (S-17 fail-open).
48#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
49pub enum SandboxError {
50    /// Cargo feature `xfa-js-sandboxed` not compiled in. Returned by
51    /// the [`null::NullRuntime`] for every `execute_script` call.
52    #[error("sandboxed runtime not compiled in")]
53    NotCompiledIn,
54
55    /// Script body exceeds the per-script size cap (S-11; default 64 KB).
56    #[error("script body exceeds size cap")]
57    BodyTooLarge,
58
59    /// Per-script time budget exceeded (S-9; default 100 ms hard).
60    #[error("script time budget exceeded")]
61    Timeout,
62
63    /// Per-document memory budget exceeded (S-10; default 32 MiB hard).
64    #[error("document memory budget exceeded")]
65    OutOfMemory,
66
67    /// Call stack depth exceeded the configured maximum (S-12; default 64).
68    #[error("call stack overflow")]
69    StackOverflow,
70
71    /// Activity not in the runtime allowlist for sandboxed dispatch
72    /// (S-14). UI / submission activities skip the runtime entirely
73    /// at the dispatch boundary and never reach this error path; this
74    /// variant exists for explicit binding-level phase guards.
75    #[error("activity {0:?} denied for sandbox dispatch")]
76    PhaseDenied(String),
77
78    /// Phase B: no host bindings registered. Returned when a script
79    /// attempts to read or write any `xfa.*` / `field.*` binding the
80    /// adapter has not yet exposed.
81    #[error("no host bindings registered (Phase B skeleton)")]
82    NoBindings,
83
84    /// FFI panic captured via `std::panic::catch_unwind`. Used by the
85    /// rquickjs backend to keep panics from crossing the FFI boundary
86    /// into the Rust caller.
87    #[error("sandbox panic captured: {0}")]
88    PanicCaptured(String),
89
90    /// Generic script-level error: parse, runtime, or thrown JS error.
91    #[error("script error: {0}")]
92    ScriptError(String),
93}
94
95/// Cumulative metadata for a single document's flatten. The runtime
96/// adapter accumulates counters across calls; the dispatch site reads
97/// them via [`XfaJsRuntime::take_metadata`] when the document is done.
98#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
99pub struct RuntimeMetadata {
100    /// Scripts that ran to completion inside the sandbox.
101    pub executed: usize,
102    /// Generic runtime / script errors (parse, throw, NoBindings, …).
103    pub runtime_errors: usize,
104    /// Time-budget exhaustions.
105    pub timeouts: usize,
106    /// Memory-budget exhaustions.
107    pub oom: usize,
108    /// Phase C host-binding invocations.
109    pub host_calls: usize,
110    /// Phase C successful `field.rawValue` writes.
111    pub mutations: usize,
112    /// Phase D successful instanceManager structure writes.
113    pub instance_writes: usize,
114    /// Phase D-β successful listbox clearItems / addItem writes.
115    pub list_writes: usize,
116    /// Phase C binding-level failures (type, activity, cap, parse).
117    pub binding_errors: usize,
118    /// Phase C SOM resolution misses / failures.
119    pub resolve_failures: usize,
120    /// Phase D-γ successful DataDom reads (children / value / child-by-name).
121    pub data_reads: usize,
122}
123
124impl RuntimeMetadata {
125    /// True when the runtime never reported any error class.
126    pub fn is_clean(&self) -> bool {
127        self.runtime_errors == 0
128            && self.timeouts == 0
129            && self.oom == 0
130            && self.binding_errors == 0
131            && self.resolve_failures == 0
132    }
133
134    /// Add another metadata snapshot into this one.
135    pub fn accumulate(&mut self, other: RuntimeMetadata) {
136        self.executed = self.executed.saturating_add(other.executed);
137        self.runtime_errors = self.runtime_errors.saturating_add(other.runtime_errors);
138        self.timeouts = self.timeouts.saturating_add(other.timeouts);
139        self.oom = self.oom.saturating_add(other.oom);
140        self.host_calls = self.host_calls.saturating_add(other.host_calls);
141        self.mutations = self.mutations.saturating_add(other.mutations);
142        self.instance_writes = self.instance_writes.saturating_add(other.instance_writes);
143        self.list_writes = self.list_writes.saturating_add(other.list_writes);
144        self.binding_errors = self.binding_errors.saturating_add(other.binding_errors);
145        self.resolve_failures = self.resolve_failures.saturating_add(other.resolve_failures);
146        self.data_reads = self.data_reads.saturating_add(other.data_reads);
147    }
148}
149
150/// Default per-script wall-clock budget enforced by the rquickjs
151/// backend (S-9). Exposed as a constant so tests can reason about it
152/// without depending on the runtime backend module.
153pub const DEFAULT_TIME_BUDGET_MS: u64 = 100;
154
155/// Default per-document memory budget enforced by the rquickjs backend
156/// (S-10).
157pub const DEFAULT_MEMORY_BUDGET_BYTES: usize = 32 * 1024 * 1024;
158
159/// Hard cap on script body size (S-11). Bodies above this length are
160/// rejected before any parse attempt.
161pub const MAX_SCRIPT_BODY_BYTES: usize = 64 * 1024;
162
163/// The activities for which the sandboxed runtime accepts dispatch.
164/// Other activities (`click`, `preSubmit`, `mouseEnter`, …) skip the
165/// runtime entirely at the [`crate::dynamic::apply_dynamic_scripts_with_mode`]
166/// boundary because they do not fire during static flatten (S-14).
167pub const SANDBOX_ACTIVITY_ALLOWLIST: &[&str] = &[
168    "initialize",
169    "calculate",
170    "validate",
171    "docReady",
172    "layoutReady",
173];
174
175/// True when `activity` is in [`SANDBOX_ACTIVITY_ALLOWLIST`].
176pub fn activity_allowed_for_sandbox(activity: Option<&str>) -> bool {
177    matches!(activity, Some(a) if SANDBOX_ACTIVITY_ALLOWLIST.contains(&a))
178}
179
180/// The host-side adapter the dispatch path calls. A minimal contract
181/// chosen so that swapping backends (rquickjs ↔ boa ↔ external sandbox)
182/// is one Cargo feature flag away.
183pub trait XfaJsRuntime {
184    /// One-time initialisation. Idempotent.
185    fn init(&mut self) -> Result<(), SandboxError>;
186
187    /// Reset per-document state (memory budget, instruction counter,
188    /// any cached compiled scripts). Called once per flatten.
189    fn reset_for_new_document(&mut self) -> Result<(), SandboxError>;
190
191    /// Phase C: install the `FormTree` the runtime should resolve paths
192    /// against and mutate. The dispatch path owns the mutable borrow and clears
193    /// the handle before returning.
194    fn set_form_handle(
195        &mut self,
196        _form: *mut FormTree,
197        _root_id: FormNodeId,
198    ) -> Result<(), SandboxError> {
199        Ok(())
200    }
201
202    /// Phase D-γ: install a read-only view of the `DataDom` for the current
203    /// document. Called once per document after `set_form_handle`, before any
204    /// scripts run. Default: no-op (backends without DataDom support ignore it).
205    ///
206    /// # Safety
207    /// Callers **must** guarantee that `dom` outlives all script execution for
208    /// this document (i.e. it must remain alive until `set_form_handle(null)`
209    /// is called). The runtime stores the pointer read-only and never writes
210    /// through it.
211    fn set_data_handle(&mut self, _dom: *const DataDom) {}
212
213    /// Phase C: reset per-script host counters and install the current script
214    /// context node / activity. Backends without host bindings ignore it.
215    fn reset_per_script(
216        &mut self,
217        _current_id: FormNodeId,
218        _activity: Option<&str>,
219    ) -> Result<(), SandboxError> {
220        Ok(())
221    }
222
223    /// Phase C page-count foundation. The current flatten order runs scripts
224    /// before layout, so callers normally leave this at 0.
225    fn set_static_page_count(&mut self, _page_count: u32) -> Result<(), SandboxError> {
226        Ok(())
227    }
228
229    /// Execute one script body inside the sandbox.
230    ///
231    /// `activity` is the enclosing `<event activity="...">` value if
232    /// any. The dispatch site has already filtered against
233    /// [`activity_allowed_for_sandbox`]; backends may treat unknown
234    /// activities as `PhaseDenied` for defence-in-depth.
235    fn execute_script(
236        &mut self,
237        activity: Option<&str>,
238        body: &str,
239    ) -> Result<RuntimeOutcome, SandboxError>;
240
241    /// Take the cumulative metadata since the last `take_metadata`
242    /// call (or since `reset_for_new_document`, whichever was later).
243    fn take_metadata(&mut self) -> RuntimeMetadata;
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    #[test]
251    fn allowlist_accepts_initialize_and_calculate() {
252        assert!(activity_allowed_for_sandbox(Some("initialize")));
253        assert!(activity_allowed_for_sandbox(Some("calculate")));
254        assert!(activity_allowed_for_sandbox(Some("validate")));
255        assert!(activity_allowed_for_sandbox(Some("docReady")));
256        assert!(activity_allowed_for_sandbox(Some("layoutReady")));
257    }
258
259    #[test]
260    fn allowlist_rejects_ui_and_submit_activities() {
261        for ui in [
262            "click",
263            "mouseEnter",
264            "mouseExit",
265            "enter",
266            "exit",
267            "preSubmit",
268            "postSubmit",
269            "ready",
270        ] {
271            assert!(
272                !activity_allowed_for_sandbox(Some(ui)),
273                "{ui} must not be allowed",
274            );
275        }
276        assert!(!activity_allowed_for_sandbox(None));
277    }
278
279    #[test]
280    fn metadata_is_clean_when_zero() {
281        assert!(RuntimeMetadata::default().is_clean());
282        let mut m = RuntimeMetadata::default();
283        m.executed = 5;
284        assert!(m.is_clean(), "executed counter does not flip cleanliness");
285        m.runtime_errors = 1;
286        assert!(!m.is_clean());
287    }
288
289    #[test]
290    fn budget_constants_are_sane() {
291        assert!(MAX_SCRIPT_BODY_BYTES >= 4096);
292        assert!(DEFAULT_TIME_BUDGET_MS >= 25);
293        assert!(DEFAULT_MEMORY_BUDGET_BYTES >= 1024 * 1024);
294    }
295}