pdf_xfa/js_runtime/mod.rs
1//! M3-B Phase B — JavaScript runtime adapter (skeleton).
2//!
3//! This module is the integration boundary between `crates/pdf-xfa`'s
4//! flatten pipeline and a sandboxed JavaScript runtime. Phase B ships
5//! the boundary plus a `NullRuntime` stub. The rquickjs-backed runtime
6//! is gated behind the `xfa-js-sandboxed` Cargo feature and registers
7//! **no host bindings** — Phase C adds the first useful set per
8//! `benchmarks/runs/M3B_HOST_BINDINGS_MINIMUM_SET.md`.
9//!
10//! The default behaviour of [`crate::dynamic::apply_dynamic_scripts`]
11//! and `flatten_xfa_to_pdf` is unchanged: with the feature off and
12//! mode `BestEffortStatic`, the runtime is never invoked. Adding the
13//! adapter is intentionally behaviour-neutral.
14//!
15//! See `benchmarks/runs/M3B_RUNTIME_SECURITY_MODEL.md` for the 18
16//! invariants the adapter must respect (S-1..S-18).
17
18pub mod host;
19pub mod null;
20pub mod regex_guard;
21
22#[cfg(feature = "xfa-js-sandboxed")]
23pub mod rquickjs_backend;
24
25pub use host::{
26 HostBindings, MutationLogEntry, MAX_INSTANCES_PER_SUBFORM, MAX_ITEMS_PER_LISTBOX,
27 MAX_MUTATIONS_PER_DOC, MAX_RESOLVE_CALLS_PER_SCRIPT, MAX_RESOLVE_RESULTS, MAX_SOM_DEPTH,
28};
29pub use null::NullRuntime;
30#[cfg(feature = "xfa-js-sandboxed")]
31pub use rquickjs_backend::QuickJsRuntime;
32
33use xfa_dom_resolver::data_dom::DataDom;
34use xfa_layout_engine::form::{FormNodeId, FormTree};
35
36/// Outcome of evaluating one script body inside the sandbox.
37#[derive(Debug, Clone, Default)]
38pub struct RuntimeOutcome {
39 /// True when the script ran to completion inside the sandbox.
40 pub executed: bool,
41 /// Number of host-tree mutations the script applied via the
42 /// (currently empty) host-binding allowlist. Always 0 in Phase B.
43 pub mutated_field_count: usize,
44}
45
46/// Errors the runtime adapter can emit. Every variant is recoverable
47/// at the dispatch site — the parent flatten never aborts because of
48/// a sandbox error (S-17 fail-open).
49#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
50pub enum SandboxError {
51 /// Cargo feature `xfa-js-sandboxed` not compiled in. Returned by
52 /// the [`null::NullRuntime`] for every `execute_script` call.
53 #[error("sandboxed runtime not compiled in")]
54 NotCompiledIn,
55
56 /// Script body exceeds the per-script size cap (S-11; default 64 KB).
57 #[error("script body exceeds size cap")]
58 BodyTooLarge,
59
60 /// Per-script time budget exceeded (S-9; default 100 ms hard).
61 #[error("script time budget exceeded")]
62 Timeout,
63
64 /// Per-document memory budget exceeded (S-10; default 32 MiB hard).
65 #[error("document memory budget exceeded")]
66 OutOfMemory,
67
68 /// Call stack depth exceeded the configured maximum (S-12; default 64).
69 #[error("call stack overflow")]
70 StackOverflow,
71
72 /// Activity not in the runtime allowlist for sandboxed dispatch
73 /// (S-14). UI / submission activities skip the runtime entirely
74 /// at the dispatch boundary and never reach this error path; this
75 /// variant exists for explicit binding-level phase guards.
76 #[error("activity {0:?} denied for sandbox dispatch")]
77 PhaseDenied(String),
78
79 /// Phase B: no host bindings registered. Returned when a script
80 /// attempts to read or write any `xfa.*` / `field.*` binding the
81 /// adapter has not yet exposed.
82 #[error("no host bindings registered (Phase B skeleton)")]
83 NoBindings,
84
85 /// FFI panic captured via `std::panic::catch_unwind`. Used by the
86 /// rquickjs backend to keep panics from crossing the FFI boundary
87 /// into the Rust caller.
88 #[error("sandbox panic captured: {0}")]
89 PanicCaptured(String),
90
91 /// Generic script-level error: parse, runtime, or thrown JS error.
92 #[error("script error: {0}")]
93 ScriptError(String),
94
95 /// W3-A — REDOS-01 mitigation: the script body contains a regex
96 /// pattern shape known to cause catastrophic backtracking in QuickJS's
97 /// NFA-based engine (e.g. `(a+)+$`). The body is rejected before
98 /// reaching the sandbox to bound CPU time. See
99 /// `crates/pdf-xfa/src/js_runtime/regex_guard.rs` for the heuristic
100 /// catalogue.
101 #[error("regex rejected by ReDoS guard: {0}")]
102 RegexRejected(String),
103
104 /// QF1-E / SEC-01 — defence-in-depth wall-time fallback. The primary
105 /// per-script time budget is enforced via the rquickjs interrupt
106 /// handler that polls at JS opcode boundaries (see
107 /// [`SandboxError::Timeout`]). When the interrupt callback fails to
108 /// fire for an extended period — for example because execution is
109 /// trapped inside a single C-level call (regex, JSON.parse on a
110 /// pathological input, host binding routine) that does not yield
111 /// opcode boundaries — the wall-clock can drift well beyond the
112 /// configured budget. This variant is emitted when total elapsed
113 /// time crossed the fallback threshold
114 /// ([`WALLTIME_FALLBACK_MULTIPLIER_DEFAULT`] × the configured
115 /// time budget). It is strictly a *post-hoc classification*:
116 /// the primary interrupt path remains in charge of actually aborting
117 /// script execution; this variant simply re-labels the error so
118 /// observability can distinguish "clean stop at budget" from
119 /// "stop dragged past 5×". See
120 /// `crates/pdf-xfa/src/js_runtime/rquickjs_backend.rs` and the
121 /// QF1_E report under `benchmarks/runs/xfa_enterprise_plan/quality_factory_v1/`.
122 #[error("wall-time fallback fired: elapsed {0}")]
123 WallTimeExceeded(String),
124}
125
126/// Cumulative metadata for a single document's flatten. The runtime
127/// adapter accumulates counters across calls; the dispatch site reads
128/// them via [`XfaJsRuntime::take_metadata`] when the document is done.
129#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
130pub struct RuntimeMetadata {
131 /// Scripts that ran to completion inside the sandbox.
132 pub executed: usize,
133 /// Generic runtime / script errors (parse, throw, NoBindings, …).
134 pub runtime_errors: usize,
135 /// Time-budget exhaustions.
136 pub timeouts: usize,
137 /// Memory-budget exhaustions.
138 pub oom: usize,
139 /// Phase C host-binding invocations.
140 pub host_calls: usize,
141 /// Phase C successful `field.rawValue` writes.
142 pub mutations: usize,
143 /// Phase D successful instanceManager structure writes.
144 pub instance_writes: usize,
145 /// Phase D-β successful listbox clearItems / addItem writes.
146 pub list_writes: usize,
147 /// Phase C binding-level failures (type, activity, cap, parse).
148 pub binding_errors: usize,
149 /// Phase C SOM resolution misses / failures.
150 pub resolve_failures: usize,
151 /// Phase D-γ successful DataDom reads (children / value / child-by-name).
152 pub data_reads: usize,
153 /// Phase E (XFA-JS-HOST-STUBS): Calls into host capabilities that require
154 /// genuine viewer / user interaction (UI dialogs, signature panels,
155 /// network submit). The sandbox cannot honestly satisfy these during a
156 /// non-interactive flatten; instead of raising a `TypeError` (which would
157 /// abort the script and inflate [`runtime_errors`](Self::runtime_errors))
158 /// the stubs return a safe default value and increment this counter so
159 /// the dispatch site keeps observability of "would-have-been-interactive"
160 /// touch points. Note: this counter is intentionally NOT folded into
161 /// [`is_clean`](Self::is_clean) — a script that touched
162 /// `xfa.host.messageBox` is still considered to have run cleanly because
163 /// the sandbox did not error; embedders that care about UI gaps should
164 /// inspect this field explicitly.
165 pub unsupported_host_calls: usize,
166 /// Phase D-θ.2 probe calls skipped because `parentIds.length == 1 &&
167 /// chain.length == 1` (no same-name ambiguity possible). Every skipped
168 /// call saves one `resolveWithFullChainStrict` host round-trip.
169 pub probe_skips: usize,
170 /// D3 (trace-only): `<variables>` `<script>` objects collected from the
171 /// template for this document (root + subform scopes).
172 pub variables_scripts_collected: usize,
173 /// D3 (trace-only): `<variables>` `<text>` data items collected.
174 pub variables_data_items_collected: usize,
175 /// D3 (trace-only): script objects whose JS-side registration returned
176 /// success (namespace bound into `variablesScripts` / `subformVariables`).
177 pub script_objects_registered: usize,
178 /// D3 (trace-only): script objects that did NOT register — either a Rust
179 /// skip (`BodyTooLarge` / `RegexRejected` / panic) or a JS-side eval
180 /// failure (`setVariablesScript` returned `false`). Pure observability;
181 /// never folded into [`is_clean`](Self::is_clean) or rollback.
182 pub script_objects_register_failed: usize,
183 /// D3 (trace-only): script objects collected under a NESTED subform scope
184 /// (registered into `subformVariables` only, hence not reachable as a bare
185 /// identifier today — the "scope_hidden" gap class).
186 pub script_objects_subform_scoped: usize,
187 /// D4: total SOM lookups observed at the host resolve boundary (successes +
188 /// failures across the instrumented `resolve_*` entry points).
189 pub som_lookups_total: usize,
190 /// D4: SOM lookups that resolved to at least one node.
191 pub som_lookup_successes: usize,
192 /// D4: SOM lookups that returned NoMatch.
193 pub som_lookup_failures: usize,
194 /// D4: subform-scoped script-object names NOT exposed because the same name
195 /// is declared by ≥2 subforms (fail-closed ambiguity).
196 pub som_lookup_ambiguous: usize,
197 /// D4: subform-scoped script objects exposed to bare-identifier lookup
198 /// (unique-name, sandboxed-only).
199 pub som_subform_scripts_exposed: usize,
200 /// D4 (trace-only): SOM NoMatch references whose path is an `occur` path
201 /// (`occur` / `occur.min` / `occur.max` …). Classified, NOT resolved.
202 pub som_occur_path_refs: usize,
203 /// D5: `node.occur` handle accesses (successes + failures).
204 pub occur_lookups_total: usize,
205 /// D5: `node.occur` accesses where the node handle was live.
206 pub occur_lookup_successes: usize,
207 /// D5: `node.occur` accesses where the node handle was not live.
208 pub occur_lookup_failures: usize,
209 /// D5: reads of an `occur` property (`min`/`max`/`initial`).
210 pub occur_property_reads: usize,
211 /// D5: writes to an `occur` property (captured, not applied).
212 pub occur_property_writes: usize,
213 /// D5: writes specifically to `occur.min`.
214 pub occur_min_writes: usize,
215 /// D5: writes specifically to `occur.max`.
216 pub occur_max_writes: usize,
217 /// D5: occur mutations captured as intent (no layout effect).
218 pub occur_mutations_captured: usize,
219 /// D5: occur mutations APPLIED to layout. **Always 0 in D5** (capture-only);
220 /// D6 bumps this when `XFA_OCCUR_APPLY=1` applies a captured `occur.min`.
221 pub occur_mutations_applied: usize,
222 /// D6: captured occur mutations NOT applied (rollback, apply-flag off,
223 /// dead/non-repeatable target, unsupported prop, negative value).
224 pub occur_mutations_skipped: usize,
225 /// D6: captured occur mutations skipped because the target node is not a
226 /// repeatable container (Subform/Area/ExclGroup) — fail-closed.
227 pub occur_application_ambiguous: usize,
228 /// D6: distinct form nodes whose occur was applied.
229 pub occur_application_targets: usize,
230 /// BE-1: `$data` bare-global intercepts resolved successfully (JS layer).
231 pub som_data_root_hits: usize,
232 /// BE-1: `#items` property accesses resolved to a non-empty item list.
233 pub som_items_path_hits: usize,
234}
235
236impl RuntimeMetadata {
237 /// True when the runtime never reported any error class.
238 pub fn is_clean(&self) -> bool {
239 self.runtime_errors == 0
240 && self.timeouts == 0
241 && self.oom == 0
242 && self.binding_errors == 0
243 && self.resolve_failures == 0
244 }
245
246 /// Add another metadata snapshot into this one.
247 pub fn accumulate(&mut self, other: RuntimeMetadata) {
248 self.executed = self.executed.saturating_add(other.executed);
249 self.runtime_errors = self.runtime_errors.saturating_add(other.runtime_errors);
250 self.timeouts = self.timeouts.saturating_add(other.timeouts);
251 self.oom = self.oom.saturating_add(other.oom);
252 self.host_calls = self.host_calls.saturating_add(other.host_calls);
253 self.mutations = self.mutations.saturating_add(other.mutations);
254 self.instance_writes = self.instance_writes.saturating_add(other.instance_writes);
255 self.list_writes = self.list_writes.saturating_add(other.list_writes);
256 self.binding_errors = self.binding_errors.saturating_add(other.binding_errors);
257 self.resolve_failures = self.resolve_failures.saturating_add(other.resolve_failures);
258 self.data_reads = self.data_reads.saturating_add(other.data_reads);
259 self.unsupported_host_calls = self
260 .unsupported_host_calls
261 .saturating_add(other.unsupported_host_calls);
262 self.probe_skips = self.probe_skips.saturating_add(other.probe_skips);
263 self.variables_scripts_collected = self
264 .variables_scripts_collected
265 .saturating_add(other.variables_scripts_collected);
266 self.variables_data_items_collected = self
267 .variables_data_items_collected
268 .saturating_add(other.variables_data_items_collected);
269 self.script_objects_registered = self
270 .script_objects_registered
271 .saturating_add(other.script_objects_registered);
272 self.script_objects_register_failed = self
273 .script_objects_register_failed
274 .saturating_add(other.script_objects_register_failed);
275 self.script_objects_subform_scoped = self
276 .script_objects_subform_scoped
277 .saturating_add(other.script_objects_subform_scoped);
278 self.som_lookups_total = self
279 .som_lookups_total
280 .saturating_add(other.som_lookups_total);
281 self.som_lookup_successes = self
282 .som_lookup_successes
283 .saturating_add(other.som_lookup_successes);
284 self.som_lookup_failures = self
285 .som_lookup_failures
286 .saturating_add(other.som_lookup_failures);
287 self.som_lookup_ambiguous = self
288 .som_lookup_ambiguous
289 .saturating_add(other.som_lookup_ambiguous);
290 self.som_subform_scripts_exposed = self
291 .som_subform_scripts_exposed
292 .saturating_add(other.som_subform_scripts_exposed);
293 self.som_occur_path_refs = self
294 .som_occur_path_refs
295 .saturating_add(other.som_occur_path_refs);
296 self.occur_lookups_total = self
297 .occur_lookups_total
298 .saturating_add(other.occur_lookups_total);
299 self.occur_lookup_successes = self
300 .occur_lookup_successes
301 .saturating_add(other.occur_lookup_successes);
302 self.occur_lookup_failures = self
303 .occur_lookup_failures
304 .saturating_add(other.occur_lookup_failures);
305 self.occur_property_reads = self
306 .occur_property_reads
307 .saturating_add(other.occur_property_reads);
308 self.occur_property_writes = self
309 .occur_property_writes
310 .saturating_add(other.occur_property_writes);
311 self.occur_min_writes = self.occur_min_writes.saturating_add(other.occur_min_writes);
312 self.occur_max_writes = self.occur_max_writes.saturating_add(other.occur_max_writes);
313 self.occur_mutations_captured = self
314 .occur_mutations_captured
315 .saturating_add(other.occur_mutations_captured);
316 self.occur_mutations_applied = self
317 .occur_mutations_applied
318 .saturating_add(other.occur_mutations_applied);
319 self.occur_mutations_skipped = self
320 .occur_mutations_skipped
321 .saturating_add(other.occur_mutations_skipped);
322 self.occur_application_ambiguous = self
323 .occur_application_ambiguous
324 .saturating_add(other.occur_application_ambiguous);
325 self.occur_application_targets = self
326 .occur_application_targets
327 .saturating_add(other.occur_application_targets);
328 self.som_data_root_hits = self
329 .som_data_root_hits
330 .saturating_add(other.som_data_root_hits);
331 self.som_items_path_hits = self
332 .som_items_path_hits
333 .saturating_add(other.som_items_path_hits);
334 }
335}
336
337/// Epic A E-2/E-3: verbose per-entry diagnostic logs drained alongside
338/// [`RuntimeMetadata`] from [`HostBindings`]. Empty unless
339/// `XFA_RUNTIME_DIAG=1` is set at the HostBindings call sites.
340#[derive(Debug, Default)]
341pub struct RuntimeDiagLogs {
342 /// E-2: SOM resolution misses (capped at 200).
343 pub som_fail_log: Vec<crate::dynamic::SomFailEntry>,
344 /// E-3: instanceManager write events (capped at 200).
345 pub instance_write_log: Vec<crate::dynamic::InstanceWriteEntry>,
346}
347
348/// Default per-script wall-clock budget enforced by the rquickjs
349/// backend (S-9). Exposed as a constant so tests can reason about it
350/// without depending on the runtime backend module.
351pub const DEFAULT_TIME_BUDGET_MS: u64 = 100;
352
353/// QF1-E / SEC-01 — default multiplier for the worker-level wall-time
354/// fallback. The primary timeout enforcement is the rquickjs interrupt
355/// callback (polled at JS opcode boundaries); when the script's elapsed
356/// wall-clock exceeds this multiplier × the configured time budget, the
357/// backend re-labels the resulting error as
358/// [`SandboxError::WallTimeExceeded`] (instead of [`SandboxError::Timeout`]).
359///
360/// The default of `5` means: if the interrupt callback fires within ≤ 5×
361/// the configured budget (the normal case for `while(true){}`), behaviour
362/// is byte-identical to v1: `SandboxError::Timeout` is emitted.
363///
364/// Only when the abort drags past 5× the budget — indicating the primary
365/// interrupt mechanism was unable to fire at the budget boundary, e.g.
366/// because execution was trapped inside a single C-level call — does the
367/// fallback variant surface. This is observable telemetry, not a new
368/// kill switch: the interrupt still does the actual aborting.
369///
370/// The multiplier is generous on purpose: small CI scheduling jitter on
371/// a 50 ms budget (1× = 50 ms; 5× = 250 ms) must never accidentally
372/// trip the fallback for a script the interrupt aborted cleanly.
373pub const WALLTIME_FALLBACK_MULTIPLIER_DEFAULT: u32 = 5;
374
375/// QF1-E / SEC-01 — environment variable that overrides the wall-time
376/// fallback multiplier ([`WALLTIME_FALLBACK_MULTIPLIER_DEFAULT`]). Only
377/// values that parse as a positive `u32` ≥ 2 are honoured; anything else
378/// (absent, empty, `"0"`, `"1"`, non-numeric) keeps the default. The
379/// minimum of 2 prevents an operator from accidentally collapsing the
380/// fallback onto the primary timeout boundary, which would re-classify
381/// every clean timeout as a wall-time fallback and corrupt observability.
382pub const ENV_WALLTIME_FALLBACK_MULTIPLIER: &str = "XFA_JS_WALLTIME_FALLBACK_MULTIPLIER";
383
384/// QF1-E / SEC-01 — read the wall-time fallback multiplier from the
385/// environment, clamped to a sane range. Returns
386/// [`WALLTIME_FALLBACK_MULTIPLIER_DEFAULT`] when the env var is absent,
387/// not a valid `u32`, or below the safety minimum of `2`.
388///
389/// Reads the env var once per call; the backend snapshots the result at
390/// construction time so per-test toggling is deterministic.
391pub fn walltime_fallback_multiplier() -> u32 {
392 std::env::var(ENV_WALLTIME_FALLBACK_MULTIPLIER)
393 .ok()
394 .and_then(|s| s.parse::<u32>().ok())
395 .filter(|&n| n >= 2)
396 .unwrap_or(WALLTIME_FALLBACK_MULTIPLIER_DEFAULT)
397}
398
399/// Default per-document memory budget enforced by the rquickjs backend
400/// (S-10).
401pub const DEFAULT_MEMORY_BUDGET_BYTES: usize = 32 * 1024 * 1024;
402
403/// Hard cap on script body size (S-11). Bodies above this length are
404/// rejected before any parse attempt.
405pub const MAX_SCRIPT_BODY_BYTES: usize = 64 * 1024;
406
407/// Hard cap on `<variables><script>` body size (W2-B).
408///
409/// Variables-scripts are form-level helper libraries (XFA 3.3 §5.5):
410/// they hold the top-level `var` / `function` declarations that event
411/// scripts call as `<scriptName>.<top_level_decl>(...)`. They run once
412/// per document at registration time, under the same per-script time
413/// budget ([`DEFAULT_TIME_BUDGET_MS`]) and same per-document memory
414/// budget ([`DEFAULT_MEMORY_BUDGET_BYTES`]) as event scripts.
415///
416/// Real-world government XFA forms (Canadian IRCC `imm5709e` / `imm5710e`,
417/// Canadian Revenue Agency `t2200` / `t2-fill`) ship variables-scripts
418/// such as `validateForm` (~125 KB), `CoreFunctions` (~115 KB) and
419/// `LOV` (~507 KB) — well above the 64 KB event-script cap. With the
420/// event-script cap applied to variables-scripts, registration fails
421/// silently with [`SandboxError::BodyTooLarge`] and the dependent
422/// event scripts cannot resolve `validateForm.X()` / `CoreFunctions.X()`,
423/// surfacing as the W1-B `implicit_function` cluster (impact 79 across
424/// 10 docs).
425///
426/// 1 MiB is intentionally above the largest observed real-world body
427/// (`LOV` ≈ 507 KB on `imm5710e`) so the cap remains a defence-in-depth
428/// stop and never a routine failure path. The time and memory budgets
429/// still bound runaway parse / execute cost.
430pub const MAX_VARIABLES_SCRIPT_BODY_BYTES: usize = 1024 * 1024;
431
432/// The activities for which the sandboxed runtime accepts dispatch.
433/// Other activities (`click`, `preSubmit`, `mouseEnter`, …) skip the
434/// runtime entirely at the [`crate::dynamic::apply_dynamic_scripts_with_mode`]
435/// boundary because they do not fire during static flatten (S-14).
436pub const SANDBOX_ACTIVITY_ALLOWLIST: &[&str] = &[
437 "initialize",
438 "calculate",
439 "validate",
440 "docReady",
441 "layoutReady",
442];
443
444/// True when `activity` is in [`SANDBOX_ACTIVITY_ALLOWLIST`].
445pub fn activity_allowed_for_sandbox(activity: Option<&str>) -> bool {
446 matches!(activity, Some(a) if SANDBOX_ACTIVITY_ALLOWLIST.contains(&a))
447}
448
449/// **D1.B gated allow.** Environment variable that opts an operator into the
450/// `preSave` dispatch path during flatten. Default OFF; any value other than
451/// `"1"` keeps the W3-B closure semantics (deny `preSave` at the dispatch
452/// gate AND at the host-binding gate).
453///
454/// Cross-ref: `docs/INST_MGR_ACTIVITY_POLICY.md` v5 §6.1 (D1.B).
455///
456/// **Stop-rules (enforced by tests, never by code):**
457/// 1. Only `preSave` is affected. `preSubmit`, `click`, and every other
458/// denylist activity stay denied regardless of this flag.
459/// 2. Default-OFF. Behaviour is byte-identical to v4 (W3-B closure) when
460/// the variable is absent or unset, or set to any value other than `"1"`.
461/// 3. Flipping requires an operator-signed waiver per the policy doc; the
462/// flag is read at dispatch (one snapshot per flatten), never globally
463/// memoised, so test harnesses can toggle it per-test.
464pub const ENV_PRESAVE_DURING_FLATTEN: &str = "XFA_PRESAVE_DURING_FLATTEN";
465
466/// True when `XFA_PRESAVE_DURING_FLATTEN=1`. Any other value (absent, empty,
467/// `"0"`, `"true"`, `"yes"`, casing variants) returns false. This is the
468/// only place that reads the environment for the D1.B gate — every other
469/// site receives a `bool` argument so tests can toggle deterministically.
470pub fn presave_during_flatten_enabled() -> bool {
471 std::env::var(ENV_PRESAVE_DURING_FLATTEN).ok().as_deref() == Some("1")
472}
473
474/// **D1.B gated allow.** Same as [`activity_allowed_for_sandbox`], but
475/// also accepts `Some("preSave")` when `presave_gate` is true.
476///
477/// `presave_gate` is computed once per flatten (via
478/// [`presave_during_flatten_enabled`]) and threaded down so the dispatch
479/// path can decide deterministically per script; the host-binding layer
480/// receives the same bool via [`HostBindings::set_presave_gate`].
481///
482/// Cross-ref: `docs/INST_MGR_ACTIVITY_POLICY.md` v5 §6.1 (D1.B).
483pub fn activity_allowed_for_sandbox_with_gate(activity: Option<&str>, presave_gate: bool) -> bool {
484 if activity_allowed_for_sandbox(activity) {
485 return true;
486 }
487 presave_gate && matches!(activity, Some("preSave"))
488}
489
490/// The host-side adapter the dispatch path calls. A minimal contract
491/// chosen so that swapping backends (rquickjs ↔ boa ↔ external sandbox)
492/// is one Cargo feature flag away.
493pub trait XfaJsRuntime {
494 /// One-time initialisation. Idempotent.
495 fn init(&mut self) -> Result<(), SandboxError>;
496
497 /// Reset per-document state (memory budget, instruction counter,
498 /// any cached compiled scripts). Called once per flatten.
499 fn reset_for_new_document(&mut self) -> Result<(), SandboxError>;
500
501 /// Phase C: install the `FormTree` the runtime should resolve paths
502 /// against and mutate. The dispatch path owns the mutable borrow and clears
503 /// the handle before returning.
504 fn set_form_handle(
505 &mut self,
506 _form: *mut FormTree,
507 _root_id: FormNodeId,
508 ) -> Result<(), SandboxError> {
509 Ok(())
510 }
511
512 /// Phase D-γ: install a read-only view of the `DataDom` for the current
513 /// document. Called once per document after `set_form_handle`, before any
514 /// scripts run. Default: no-op (backends without DataDom support ignore it).
515 ///
516 /// # Safety
517 /// Callers **must** guarantee that `dom` outlives all script execution for
518 /// this document (i.e. it must remain alive until `set_form_handle(null)`
519 /// is called). The runtime stores the pointer read-only and never writes
520 /// through it.
521 fn set_data_handle(&mut self, _dom: *const DataDom) {}
522
523 /// BE-1 tranche #1 (benign zero-instance SOM): install the set of
524 /// template-declared container names (`subform`/`subformSet`/`exclGroup`/
525 /// `area`) for the current document. Backends that resolve implicit SOM
526 /// identifiers use it to return a benign empty-node façade for a
527 /// declared-but-absent reference instead of `undefined` (Adobe semantics),
528 /// so guarded scripts (`if (!Sub.Child.isNull) {...} else {...}`) run their
529 /// else branch instead of throwing. Like [`set_data_handle`], the caller
530 /// installs this before script execution. Default: no-op (the static
531 /// `NullRuntime` ignores it, so the default/non-sandboxed path is
532 /// unaffected and stays byte-identical).
533 ///
534 /// [`set_data_handle`]: XfaJsRuntime::set_data_handle
535 fn set_declared_subform_names(&mut self, _names: std::collections::HashSet<String>) {}
536
537 /// Phase C: reset per-script host counters and install the current script
538 /// context node / activity. Backends without host bindings ignore it.
539 fn reset_per_script(
540 &mut self,
541 _current_id: FormNodeId,
542 _activity: Option<&str>,
543 ) -> Result<(), SandboxError> {
544 Ok(())
545 }
546
547 /// Phase C page-count foundation. The current flatten order runs scripts
548 /// before layout, so callers normally leave this at 0.
549 fn set_static_page_count(&mut self, _page_count: u32) -> Result<(), SandboxError> {
550 Ok(())
551 }
552
553 /// **D1.B gated allow.** Inform the runtime whether the
554 /// `XFA_PRESAVE_DURING_FLATTEN=1` opt-in is active for the current
555 /// flatten. The dispatch path computes this once per document via
556 /// [`presave_during_flatten_enabled`] and forwards it here so the host
557 /// binding layer can mirror the dispatch decision (defence-in-depth).
558 ///
559 /// Default: no-op. Backends without a host-binding gate ignore it.
560 /// The contract for backends that DO mirror the gate:
561 ///
562 /// 1. Default OFF: every call to [`HostBindings::write_activity_allowed`]
563 /// with `current_activity = Some("preSave")` MUST return false.
564 /// 2. Gate ON: the same call MUST return true ONLY for `Some("preSave")`.
565 /// Every other denylist activity (`preSubmit`, `click`, …) MUST
566 /// continue to return false.
567 ///
568 /// Cross-ref: `docs/INST_MGR_ACTIVITY_POLICY.md` v5 §6.1 (D1.B).
569 fn set_presave_gate(&mut self, _enabled: bool) {}
570
571 /// Execute one script body inside the sandbox.
572 ///
573 /// `activity` is the enclosing `<event activity="...">` value if
574 /// any. The dispatch site has already filtered against
575 /// [`activity_allowed_for_sandbox`]; backends may treat unknown
576 /// activities as `PhaseDenied` for defence-in-depth.
577 fn execute_script(
578 &mut self,
579 activity: Option<&str>,
580 body: &str,
581 ) -> Result<RuntimeOutcome, SandboxError>;
582
583 /// Take the cumulative metadata since the last `take_metadata`
584 /// call (or since `reset_for_new_document`, whichever was later).
585 fn take_metadata(&mut self) -> RuntimeMetadata;
586
587 /// D6: drain captured `occur.min`/`occur.max` write intents
588 /// `(node_index, prop, value)` recorded during the script pass. The
589 /// dispatch path applies them (only when `XFA_OCCUR_APPLY=1`) after the
590 /// rollback decision. Default: none (non-sandboxed runtimes capture nothing).
591 fn take_occur_mutations(&mut self) -> Vec<(usize, String, i64)> {
592 Vec::new()
593 }
594
595 /// Epic A E-2/E-3: drain verbose per-entry diagnostic logs. Only
596 /// populated when `XFA_RUNTIME_DIAG=1` is set at the host call sites.
597 /// Default impl returns empty logs (NullRuntime, non-sandboxed paths).
598 fn take_diag_logs(&mut self) -> RuntimeDiagLogs {
599 RuntimeDiagLogs::default()
600 }
601}
602
603#[cfg(test)]
604mod tests {
605 use super::*;
606
607 #[test]
608 fn allowlist_accepts_initialize_and_calculate() {
609 assert!(activity_allowed_for_sandbox(Some("initialize")));
610 assert!(activity_allowed_for_sandbox(Some("calculate")));
611 assert!(activity_allowed_for_sandbox(Some("validate")));
612 assert!(activity_allowed_for_sandbox(Some("docReady")));
613 assert!(activity_allowed_for_sandbox(Some("layoutReady")));
614 }
615
616 // D1.B gate: when the flag is OFF (the default in tests via env::var
617 // being unset for the controlled key), `activity_allowed_for_sandbox_with_gate`
618 // is byte-identical to `activity_allowed_for_sandbox`. When the gate
619 // bool is wired ON, ONLY `preSave` flips to allowed — `preSubmit`,
620 // `click`, etc. stay denied (hard stop in §6.1 of the policy doc).
621 #[test]
622 fn presave_gate_off_matches_base_allowlist() {
623 for allowed in SANDBOX_ACTIVITY_ALLOWLIST {
624 assert!(activity_allowed_for_sandbox_with_gate(Some(allowed), false));
625 }
626 for denied in [
627 "preSave",
628 "preSubmit",
629 "click",
630 "mouseEnter",
631 "exit",
632 "postSave",
633 ] {
634 assert!(!activity_allowed_for_sandbox_with_gate(Some(denied), false));
635 }
636 assert!(!activity_allowed_for_sandbox_with_gate(None, false));
637 }
638
639 #[test]
640 fn presave_gate_on_unlocks_only_presave() {
641 // preSave flips from deny -> allow when the gate is ON.
642 assert!(activity_allowed_for_sandbox_with_gate(
643 Some("preSave"),
644 true
645 ));
646 // Hard-stop: every other denylist activity MUST stay denied.
647 for still_denied in [
648 "preSubmit",
649 "click",
650 "mouseEnter",
651 "mouseExit",
652 "exit",
653 "enter",
654 "change",
655 "postSave",
656 "postSubmit",
657 "ready",
658 "prePrint",
659 "postPrint",
660 "preOpen",
661 "full",
662 ] {
663 assert!(
664 !activity_allowed_for_sandbox_with_gate(Some(still_denied), true),
665 "{still_denied} must stay denied even with D1.B gate ON",
666 );
667 }
668 assert!(!activity_allowed_for_sandbox_with_gate(None, true));
669 // Default-OFF behaviour unchanged for the 5 lifecycle activities.
670 for allowed in SANDBOX_ACTIVITY_ALLOWLIST {
671 assert!(activity_allowed_for_sandbox_with_gate(Some(allowed), true));
672 }
673 }
674
675 // Note: the env-var helper `presave_during_flatten_enabled` is pinned
676 // by integration tests in `tests/m3b_phasePQ_presave_gated_w3repair_d1b.rs`
677 // (`d1b_default_off_keeps_presave_denied_at_dispatch`,
678 // `d1b_env_var_parsing_only_one_enables_gate`). Inline unit tests would
679 // race with `std::env` because cargo test runs in-process; the
680 // integration tests serialise env mutations behind a mutex guard.
681 //
682 // ENV_PRESAVE_DURING_FLATTEN constant is canonicalised at the const
683 // declaration site and never re-spelled in code.
684
685 #[test]
686 fn presave_env_var_constant_is_canonical_name() {
687 assert_eq!(ENV_PRESAVE_DURING_FLATTEN, "XFA_PRESAVE_DURING_FLATTEN");
688 }
689
690 #[test]
691 fn allowlist_rejects_ui_and_submit_activities() {
692 for ui in [
693 "click",
694 "mouseEnter",
695 "mouseExit",
696 "enter",
697 "exit",
698 "preSubmit",
699 "postSubmit",
700 "ready",
701 ] {
702 assert!(
703 !activity_allowed_for_sandbox(Some(ui)),
704 "{ui} must not be allowed",
705 );
706 }
707 assert!(!activity_allowed_for_sandbox(None));
708 }
709
710 #[test]
711 fn metadata_is_clean_when_zero() {
712 assert!(RuntimeMetadata::default().is_clean());
713 let mut m = RuntimeMetadata {
714 executed: 5,
715 ..Default::default()
716 };
717 assert!(m.is_clean(), "executed counter does not flip cleanliness");
718 m.runtime_errors = 1;
719 assert!(!m.is_clean());
720 }
721
722 #[test]
723 // Intentional const-floor contract assertions: these pin the safety
724 // floors of compile-time budget constants. assertions_on_constants is
725 // expected and desired here.
726 #[allow(clippy::assertions_on_constants)]
727 fn budget_constants_are_sane() {
728 assert!(MAX_SCRIPT_BODY_BYTES >= 4096);
729 assert!(DEFAULT_TIME_BUDGET_MS >= 25);
730 assert!(DEFAULT_MEMORY_BUDGET_BYTES >= 1024 * 1024);
731 }
732
733 // QF1-E: the wall-time fallback multiplier must be strictly > 1 so a
734 // clean interrupt at the budget boundary cannot be reclassified as
735 // WallTimeExceeded. 5 is the documented default; this test pins it as
736 // a contract between this module and the backend.
737 // Compile-time const-assert avoids `clippy::assertions_on_constants`
738 // while still failing the build if the safety floor is broken.
739 const _WALLTIME_SAFE_MIN: () = assert!(
740 WALLTIME_FALLBACK_MULTIPLIER_DEFAULT >= 2,
741 "multiplier < 2 would re-label normal Timeouts as WallTimeExceeded"
742 );
743
744 #[test]
745 fn walltime_fallback_multiplier_default_is_safe() {
746 // The compile-time `_WALLTIME_SAFE_MIN` const above is the real
747 // contract; this runtime test pins the concrete value so a
748 // refactor that changes the default but leaves the floor intact
749 // is still caught here.
750 assert_eq!(WALLTIME_FALLBACK_MULTIPLIER_DEFAULT, 5);
751 }
752
753 #[test]
754 fn walltime_fallback_env_var_name_is_canonical() {
755 assert_eq!(
756 ENV_WALLTIME_FALLBACK_MULTIPLIER,
757 "XFA_JS_WALLTIME_FALLBACK_MULTIPLIER"
758 );
759 }
760
761 // W2-B: variables-script body cap must be strictly higher than the
762 // event-script cap so XFA helper libraries (validateForm, LOV,
763 // CoreFunctions) register; it must still be bounded so an oversize
764 // body cannot bypass static defence-in-depth before the per-document
765 // memory budget engages.
766 #[test]
767 // Intentional const-floor contract assertions on compile-time caps.
768 #[allow(clippy::assertions_on_constants)]
769 fn variables_script_cap_is_above_event_cap_and_bounded() {
770 assert!(
771 MAX_VARIABLES_SCRIPT_BODY_BYTES > MAX_SCRIPT_BODY_BYTES,
772 "variables-script cap must exceed event-script cap"
773 );
774 // Sanity: must be high enough to register the largest observed
775 // real-world variables-script library (LOV ≈ 507 KB).
776 assert!(
777 MAX_VARIABLES_SCRIPT_BODY_BYTES >= 768 * 1024,
778 "variables-script cap below observed real-world max"
779 );
780 // Sanity: must be bounded well under the per-document memory
781 // budget so a single oversize body cannot consume the entire
782 // budget on parse alone.
783 assert!(
784 MAX_VARIABLES_SCRIPT_BODY_BYTES <= DEFAULT_MEMORY_BUDGET_BYTES / 8,
785 "variables-script cap must stay an order of magnitude below memory budget"
786 );
787 }
788}