pdf_xfa/js_runtime/mod.rs
1//! M3-B Phase B — JavaScript runtime adapter (skeleton).
2//!
3//! This module is the integration boundary between `crates/pdf-xfa`'s
4//! flatten pipeline and a sandboxed JavaScript runtime. Phase B ships
5//! the boundary plus a `NullRuntime` stub. The rquickjs-backed runtime
6//! is gated behind the `xfa-js-sandboxed` Cargo feature and registers
7//! **no host bindings** — Phase C adds the first useful set per
8//! `benchmarks/runs/M3B_HOST_BINDINGS_MINIMUM_SET.md`.
9//!
10//! The default behaviour of [`crate::dynamic::apply_dynamic_scripts`]
11//! and `flatten_xfa_to_pdf` is unchanged: with the feature off and
12//! mode `BestEffortStatic`, the runtime is never invoked. Adding the
13//! adapter is intentionally behaviour-neutral.
14//!
15//! See `benchmarks/runs/M3B_RUNTIME_SECURITY_MODEL.md` for the 18
16//! invariants the adapter must respect (S-1..S-18).
17
18pub mod host;
19pub mod null;
20
21#[cfg(feature = "xfa-js-sandboxed")]
22pub mod rquickjs_backend;
23
24pub use host::{
25 HostBindings, MutationLogEntry, MAX_INSTANCES_PER_SUBFORM, MAX_ITEMS_PER_LISTBOX,
26 MAX_MUTATIONS_PER_DOC, MAX_RESOLVE_CALLS_PER_SCRIPT, MAX_RESOLVE_RESULTS, MAX_SOM_DEPTH,
27};
28pub use null::NullRuntime;
29#[cfg(feature = "xfa-js-sandboxed")]
30pub use rquickjs_backend::QuickJsRuntime;
31
32use xfa_dom_resolver::data_dom::DataDom;
33use xfa_layout_engine::form::{FormNodeId, FormTree};
34
35/// Outcome of evaluating one script body inside the sandbox.
36#[derive(Debug, Clone, Default)]
37pub struct RuntimeOutcome {
38 /// True when the script ran to completion inside the sandbox.
39 pub executed: bool,
40 /// Number of host-tree mutations the script applied via the
41 /// (currently empty) host-binding allowlist. Always 0 in Phase B.
42 pub mutated_field_count: usize,
43}
44
45/// Errors the runtime adapter can emit. Every variant is recoverable
46/// at the dispatch site — the parent flatten never aborts because of
47/// a sandbox error (S-17 fail-open).
48#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
49pub enum SandboxError {
50 /// Cargo feature `xfa-js-sandboxed` not compiled in. Returned by
51 /// the [`null::NullRuntime`] for every `execute_script` call.
52 #[error("sandboxed runtime not compiled in")]
53 NotCompiledIn,
54
55 /// Script body exceeds the per-script size cap (S-11; default 64 KB).
56 #[error("script body exceeds size cap")]
57 BodyTooLarge,
58
59 /// Per-script time budget exceeded (S-9; default 100 ms hard).
60 #[error("script time budget exceeded")]
61 Timeout,
62
63 /// Per-document memory budget exceeded (S-10; default 32 MiB hard).
64 #[error("document memory budget exceeded")]
65 OutOfMemory,
66
67 /// Call stack depth exceeded the configured maximum (S-12; default 64).
68 #[error("call stack overflow")]
69 StackOverflow,
70
71 /// Activity not in the runtime allowlist for sandboxed dispatch
72 /// (S-14). UI / submission activities skip the runtime entirely
73 /// at the dispatch boundary and never reach this error path; this
74 /// variant exists for explicit binding-level phase guards.
75 #[error("activity {0:?} denied for sandbox dispatch")]
76 PhaseDenied(String),
77
78 /// Phase B: no host bindings registered. Returned when a script
79 /// attempts to read or write any `xfa.*` / `field.*` binding the
80 /// adapter has not yet exposed.
81 #[error("no host bindings registered (Phase B skeleton)")]
82 NoBindings,
83
84 /// FFI panic captured via `std::panic::catch_unwind`. Used by the
85 /// rquickjs backend to keep panics from crossing the FFI boundary
86 /// into the Rust caller.
87 #[error("sandbox panic captured: {0}")]
88 PanicCaptured(String),
89
90 /// Generic script-level error: parse, runtime, or thrown JS error.
91 #[error("script error: {0}")]
92 ScriptError(String),
93}
94
95/// Cumulative metadata for a single document's flatten. The runtime
96/// adapter accumulates counters across calls; the dispatch site reads
97/// them via [`XfaJsRuntime::take_metadata`] when the document is done.
98#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
99pub struct RuntimeMetadata {
100 /// Scripts that ran to completion inside the sandbox.
101 pub executed: usize,
102 /// Generic runtime / script errors (parse, throw, NoBindings, …).
103 pub runtime_errors: usize,
104 /// Time-budget exhaustions.
105 pub timeouts: usize,
106 /// Memory-budget exhaustions.
107 pub oom: usize,
108 /// Phase C host-binding invocations.
109 pub host_calls: usize,
110 /// Phase C successful `field.rawValue` writes.
111 pub mutations: usize,
112 /// Phase D successful instanceManager structure writes.
113 pub instance_writes: usize,
114 /// Phase D-β successful listbox clearItems / addItem writes.
115 pub list_writes: usize,
116 /// Phase C binding-level failures (type, activity, cap, parse).
117 pub binding_errors: usize,
118 /// Phase C SOM resolution misses / failures.
119 pub resolve_failures: usize,
120 /// Phase D-γ successful DataDom reads (children / value / child-by-name).
121 pub data_reads: usize,
122}
123
124impl RuntimeMetadata {
125 /// True when the runtime never reported any error class.
126 pub fn is_clean(&self) -> bool {
127 self.runtime_errors == 0
128 && self.timeouts == 0
129 && self.oom == 0
130 && self.binding_errors == 0
131 && self.resolve_failures == 0
132 }
133
134 /// Add another metadata snapshot into this one.
135 pub fn accumulate(&mut self, other: RuntimeMetadata) {
136 self.executed = self.executed.saturating_add(other.executed);
137 self.runtime_errors = self.runtime_errors.saturating_add(other.runtime_errors);
138 self.timeouts = self.timeouts.saturating_add(other.timeouts);
139 self.oom = self.oom.saturating_add(other.oom);
140 self.host_calls = self.host_calls.saturating_add(other.host_calls);
141 self.mutations = self.mutations.saturating_add(other.mutations);
142 self.instance_writes = self.instance_writes.saturating_add(other.instance_writes);
143 self.list_writes = self.list_writes.saturating_add(other.list_writes);
144 self.binding_errors = self.binding_errors.saturating_add(other.binding_errors);
145 self.resolve_failures = self.resolve_failures.saturating_add(other.resolve_failures);
146 self.data_reads = self.data_reads.saturating_add(other.data_reads);
147 }
148}
149
150/// Default per-script wall-clock budget enforced by the rquickjs
151/// backend (S-9). Exposed as a constant so tests can reason about it
152/// without depending on the runtime backend module.
153pub const DEFAULT_TIME_BUDGET_MS: u64 = 100;
154
155/// Default per-document memory budget enforced by the rquickjs backend
156/// (S-10).
157pub const DEFAULT_MEMORY_BUDGET_BYTES: usize = 32 * 1024 * 1024;
158
159/// Hard cap on script body size (S-11). Bodies above this length are
160/// rejected before any parse attempt.
161pub const MAX_SCRIPT_BODY_BYTES: usize = 64 * 1024;
162
163/// The activities for which the sandboxed runtime accepts dispatch.
164/// Other activities (`click`, `preSubmit`, `mouseEnter`, …) skip the
165/// runtime entirely at the [`crate::dynamic::apply_dynamic_scripts_with_mode`]
166/// boundary because they do not fire during static flatten (S-14).
167pub const SANDBOX_ACTIVITY_ALLOWLIST: &[&str] = &[
168 "initialize",
169 "calculate",
170 "validate",
171 "docReady",
172 "layoutReady",
173];
174
175/// True when `activity` is in [`SANDBOX_ACTIVITY_ALLOWLIST`].
176pub fn activity_allowed_for_sandbox(activity: Option<&str>) -> bool {
177 matches!(activity, Some(a) if SANDBOX_ACTIVITY_ALLOWLIST.contains(&a))
178}
179
180/// The host-side adapter the dispatch path calls. A minimal contract
181/// chosen so that swapping backends (rquickjs ↔ boa ↔ external sandbox)
182/// is one Cargo feature flag away.
183pub trait XfaJsRuntime {
184 /// One-time initialisation. Idempotent.
185 fn init(&mut self) -> Result<(), SandboxError>;
186
187 /// Reset per-document state (memory budget, instruction counter,
188 /// any cached compiled scripts). Called once per flatten.
189 fn reset_for_new_document(&mut self) -> Result<(), SandboxError>;
190
191 /// Phase C: install the `FormTree` the runtime should resolve paths
192 /// against and mutate. The dispatch path owns the mutable borrow and clears
193 /// the handle before returning.
194 fn set_form_handle(
195 &mut self,
196 _form: *mut FormTree,
197 _root_id: FormNodeId,
198 ) -> Result<(), SandboxError> {
199 Ok(())
200 }
201
202 /// Phase D-γ: install a read-only view of the `DataDom` for the current
203 /// document. Called once per document after `set_form_handle`, before any
204 /// scripts run. Default: no-op (backends without DataDom support ignore it).
205 ///
206 /// # Safety
207 /// Callers **must** guarantee that `dom` outlives all script execution for
208 /// this document (i.e. it must remain alive until `set_form_handle(null)`
209 /// is called). The runtime stores the pointer read-only and never writes
210 /// through it.
211 fn set_data_handle(&mut self, _dom: *const DataDom) {}
212
213 /// Phase C: reset per-script host counters and install the current script
214 /// context node / activity. Backends without host bindings ignore it.
215 fn reset_per_script(
216 &mut self,
217 _current_id: FormNodeId,
218 _activity: Option<&str>,
219 ) -> Result<(), SandboxError> {
220 Ok(())
221 }
222
223 /// Phase C page-count foundation. The current flatten order runs scripts
224 /// before layout, so callers normally leave this at 0.
225 fn set_static_page_count(&mut self, _page_count: u32) -> Result<(), SandboxError> {
226 Ok(())
227 }
228
229 /// Execute one script body inside the sandbox.
230 ///
231 /// `activity` is the enclosing `<event activity="...">` value if
232 /// any. The dispatch site has already filtered against
233 /// [`activity_allowed_for_sandbox`]; backends may treat unknown
234 /// activities as `PhaseDenied` for defence-in-depth.
235 fn execute_script(
236 &mut self,
237 activity: Option<&str>,
238 body: &str,
239 ) -> Result<RuntimeOutcome, SandboxError>;
240
241 /// Take the cumulative metadata since the last `take_metadata`
242 /// call (or since `reset_for_new_document`, whichever was later).
243 fn take_metadata(&mut self) -> RuntimeMetadata;
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 fn allowlist_accepts_initialize_and_calculate() {
252 assert!(activity_allowed_for_sandbox(Some("initialize")));
253 assert!(activity_allowed_for_sandbox(Some("calculate")));
254 assert!(activity_allowed_for_sandbox(Some("validate")));
255 assert!(activity_allowed_for_sandbox(Some("docReady")));
256 assert!(activity_allowed_for_sandbox(Some("layoutReady")));
257 }
258
259 #[test]
260 fn allowlist_rejects_ui_and_submit_activities() {
261 for ui in [
262 "click",
263 "mouseEnter",
264 "mouseExit",
265 "enter",
266 "exit",
267 "preSubmit",
268 "postSubmit",
269 "ready",
270 ] {
271 assert!(
272 !activity_allowed_for_sandbox(Some(ui)),
273 "{ui} must not be allowed",
274 );
275 }
276 assert!(!activity_allowed_for_sandbox(None));
277 }
278
279 #[test]
280 fn metadata_is_clean_when_zero() {
281 assert!(RuntimeMetadata::default().is_clean());
282 let mut m = RuntimeMetadata::default();
283 m.executed = 5;
284 assert!(m.is_clean(), "executed counter does not flip cleanliness");
285 m.runtime_errors = 1;
286 assert!(!m.is_clean());
287 }
288
289 #[test]
290 fn budget_constants_are_sane() {
291 assert!(MAX_SCRIPT_BODY_BYTES >= 4096);
292 assert!(DEFAULT_TIME_BUDGET_MS >= 25);
293 assert!(DEFAULT_MEMORY_BUDGET_BYTES >= 1024 * 1024);
294 }
295}