pdf_xfa/js_runtime/
mod.rs

1//! M3-B Phase B — JavaScript runtime adapter (skeleton).
2//!
3//! This module is the integration boundary between `crates/pdf-xfa`'s
4//! flatten pipeline and a sandboxed JavaScript runtime. Phase B ships
5//! the boundary plus a `NullRuntime` stub. The rquickjs-backed runtime
6//! is gated behind the `xfa-js-sandboxed` Cargo feature and registers
7//! **no host bindings** — Phase C adds the first useful set per
8//! `benchmarks/runs/M3B_HOST_BINDINGS_MINIMUM_SET.md`.
9//!
10//! The default behaviour of [`crate::dynamic::apply_dynamic_scripts`]
11//! and `flatten_xfa_to_pdf` is unchanged: with the feature off and
12//! mode `BestEffortStatic`, the runtime is never invoked. Adding the
13//! adapter is intentionally behaviour-neutral.
14//!
15//! See `benchmarks/runs/M3B_RUNTIME_SECURITY_MODEL.md` for the 18
16//! invariants the adapter must respect (S-1..S-18).
17
18pub mod host;
19pub mod null;
20pub mod regex_guard;
21
22#[cfg(feature = "xfa-js-sandboxed")]
23pub mod rquickjs_backend;
24
25pub use host::{
26    HostBindings, MutationLogEntry, MAX_INSTANCES_PER_SUBFORM, MAX_ITEMS_PER_LISTBOX,
27    MAX_MUTATIONS_PER_DOC, MAX_RESOLVE_CALLS_PER_SCRIPT, MAX_RESOLVE_RESULTS, MAX_SOM_DEPTH,
28};
29pub use null::NullRuntime;
30#[cfg(feature = "xfa-js-sandboxed")]
31pub use rquickjs_backend::QuickJsRuntime;
32
33use xfa_dom_resolver::data_dom::DataDom;
34use xfa_layout_engine::form::{FormNodeId, FormTree};
35
36/// Outcome of evaluating one script body inside the sandbox.
37#[derive(Debug, Clone, Default)]
38pub struct RuntimeOutcome {
39    /// True when the script ran to completion inside the sandbox.
40    pub executed: bool,
41    /// Number of host-tree mutations the script applied via the
42    /// (currently empty) host-binding allowlist. Always 0 in Phase B.
43    pub mutated_field_count: usize,
44}
45
46/// Errors the runtime adapter can emit. Every variant is recoverable
47/// at the dispatch site — the parent flatten never aborts because of
48/// a sandbox error (S-17 fail-open).
49#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
50pub enum SandboxError {
51    /// Cargo feature `xfa-js-sandboxed` not compiled in. Returned by
52    /// the [`null::NullRuntime`] for every `execute_script` call.
53    #[error("sandboxed runtime not compiled in")]
54    NotCompiledIn,
55
56    /// Script body exceeds the per-script size cap (S-11; default 64 KB).
57    #[error("script body exceeds size cap")]
58    BodyTooLarge,
59
60    /// Per-script time budget exceeded (S-9; default 100 ms hard).
61    #[error("script time budget exceeded")]
62    Timeout,
63
64    /// Per-document memory budget exceeded (S-10; default 32 MiB hard).
65    #[error("document memory budget exceeded")]
66    OutOfMemory,
67
68    /// Call stack depth exceeded the configured maximum (S-12; default 64).
69    #[error("call stack overflow")]
70    StackOverflow,
71
72    /// Activity not in the runtime allowlist for sandboxed dispatch
73    /// (S-14). UI / submission activities skip the runtime entirely
74    /// at the dispatch boundary and never reach this error path; this
75    /// variant exists for explicit binding-level phase guards.
76    #[error("activity {0:?} denied for sandbox dispatch")]
77    PhaseDenied(String),
78
79    /// Phase B: no host bindings registered. Returned when a script
80    /// attempts to read or write any `xfa.*` / `field.*` binding the
81    /// adapter has not yet exposed.
82    #[error("no host bindings registered (Phase B skeleton)")]
83    NoBindings,
84
85    /// FFI panic captured via `std::panic::catch_unwind`. Used by the
86    /// rquickjs backend to keep panics from crossing the FFI boundary
87    /// into the Rust caller.
88    #[error("sandbox panic captured: {0}")]
89    PanicCaptured(String),
90
91    /// Generic script-level error: parse, runtime, or thrown JS error.
92    #[error("script error: {0}")]
93    ScriptError(String),
94
95    /// W3-A — REDOS-01 mitigation: the script body contains a regex
96    /// pattern shape known to cause catastrophic backtracking in QuickJS's
97    /// NFA-based engine (e.g. `(a+)+$`). The body is rejected before
98    /// reaching the sandbox to bound CPU time. See
99    /// `crates/pdf-xfa/src/js_runtime/regex_guard.rs` for the heuristic
100    /// catalogue.
101    #[error("regex rejected by ReDoS guard: {0}")]
102    RegexRejected(String),
103
104    /// QF1-E / SEC-01 — defence-in-depth wall-time fallback. The primary
105    /// per-script time budget is enforced via the rquickjs interrupt
106    /// handler that polls at JS opcode boundaries (see
107    /// [`SandboxError::Timeout`]). When the interrupt callback fails to
108    /// fire for an extended period — for example because execution is
109    /// trapped inside a single C-level call (regex, JSON.parse on a
110    /// pathological input, host binding routine) that does not yield
111    /// opcode boundaries — the wall-clock can drift well beyond the
112    /// configured budget. This variant is emitted when total elapsed
113    /// time crossed the fallback threshold
114    /// ([`WALLTIME_FALLBACK_MULTIPLIER_DEFAULT`] × the configured
115    /// time budget). It is strictly a *post-hoc classification*:
116    /// the primary interrupt path remains in charge of actually aborting
117    /// script execution; this variant simply re-labels the error so
118    /// observability can distinguish "clean stop at budget" from
119    /// "stop dragged past 5×". See
120    /// `crates/pdf-xfa/src/js_runtime/rquickjs_backend.rs` and the
121    /// QF1_E report under `benchmarks/runs/xfa_enterprise_plan/quality_factory_v1/`.
122    #[error("wall-time fallback fired: elapsed {0}")]
123    WallTimeExceeded(String),
124}
125
126/// Cumulative metadata for a single document's flatten. The runtime
127/// adapter accumulates counters across calls; the dispatch site reads
128/// them via [`XfaJsRuntime::take_metadata`] when the document is done.
129#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
130pub struct RuntimeMetadata {
131    /// Scripts that ran to completion inside the sandbox.
132    pub executed: usize,
133    /// Generic runtime / script errors (parse, throw, NoBindings, …).
134    pub runtime_errors: usize,
135    /// Time-budget exhaustions.
136    pub timeouts: usize,
137    /// Memory-budget exhaustions.
138    pub oom: usize,
139    /// Phase C host-binding invocations.
140    pub host_calls: usize,
141    /// Phase C successful `field.rawValue` writes.
142    pub mutations: usize,
143    /// Phase D successful instanceManager structure writes.
144    pub instance_writes: usize,
145    /// Phase D-β successful listbox clearItems / addItem writes.
146    pub list_writes: usize,
147    /// Phase C binding-level failures (type, activity, cap, parse).
148    pub binding_errors: usize,
149    /// Phase C SOM resolution misses / failures.
150    pub resolve_failures: usize,
151    /// Phase D-γ successful DataDom reads (children / value / child-by-name).
152    pub data_reads: usize,
153    /// Phase E (XFA-JS-HOST-STUBS): Calls into host capabilities that require
154    /// genuine viewer / user interaction (UI dialogs, signature panels,
155    /// network submit). The sandbox cannot honestly satisfy these during a
156    /// non-interactive flatten; instead of raising a `TypeError` (which would
157    /// abort the script and inflate [`runtime_errors`](Self::runtime_errors))
158    /// the stubs return a safe default value and increment this counter so
159    /// the dispatch site keeps observability of "would-have-been-interactive"
160    /// touch points. Note: this counter is intentionally NOT folded into
161    /// [`is_clean`](Self::is_clean) — a script that touched
162    /// `xfa.host.messageBox` is still considered to have run cleanly because
163    /// the sandbox did not error; embedders that care about UI gaps should
164    /// inspect this field explicitly.
165    pub unsupported_host_calls: usize,
166    /// Phase D-θ.2 probe calls skipped because `parentIds.length == 1 &&
167    /// chain.length == 1` (no same-name ambiguity possible).  Every skipped
168    /// call saves one `resolveWithFullChainStrict` host round-trip.
169    pub probe_skips: usize,
170    /// D3 (trace-only): `<variables>` `<script>` objects collected from the
171    /// template for this document (root + subform scopes).
172    pub variables_scripts_collected: usize,
173    /// D3 (trace-only): `<variables>` `<text>` data items collected.
174    pub variables_data_items_collected: usize,
175    /// D3 (trace-only): script objects whose JS-side registration returned
176    /// success (namespace bound into `variablesScripts` / `subformVariables`).
177    pub script_objects_registered: usize,
178    /// D3 (trace-only): script objects that did NOT register — either a Rust
179    /// skip (`BodyTooLarge` / `RegexRejected` / panic) or a JS-side eval
180    /// failure (`setVariablesScript` returned `false`). Pure observability;
181    /// never folded into [`is_clean`](Self::is_clean) or rollback.
182    pub script_objects_register_failed: usize,
183    /// D3 (trace-only): script objects collected under a NESTED subform scope
184    /// (registered into `subformVariables` only, hence not reachable as a bare
185    /// identifier today — the "scope_hidden" gap class).
186    pub script_objects_subform_scoped: usize,
187    /// D4: total SOM lookups observed at the host resolve boundary (successes +
188    /// failures across the instrumented `resolve_*` entry points).
189    pub som_lookups_total: usize,
190    /// D4: SOM lookups that resolved to at least one node.
191    pub som_lookup_successes: usize,
192    /// D4: SOM lookups that returned NoMatch.
193    pub som_lookup_failures: usize,
194    /// D4: subform-scoped script-object names NOT exposed because the same name
195    /// is declared by ≥2 subforms (fail-closed ambiguity).
196    pub som_lookup_ambiguous: usize,
197    /// D4: subform-scoped script objects exposed to bare-identifier lookup
198    /// (unique-name, sandboxed-only).
199    pub som_subform_scripts_exposed: usize,
200    /// D4 (trace-only): SOM NoMatch references whose path is an `occur` path
201    /// (`occur` / `occur.min` / `occur.max` …). Classified, NOT resolved.
202    pub som_occur_path_refs: usize,
203    /// D5: `node.occur` handle accesses (successes + failures).
204    pub occur_lookups_total: usize,
205    /// D5: `node.occur` accesses where the node handle was live.
206    pub occur_lookup_successes: usize,
207    /// D5: `node.occur` accesses where the node handle was not live.
208    pub occur_lookup_failures: usize,
209    /// D5: reads of an `occur` property (`min`/`max`/`initial`).
210    pub occur_property_reads: usize,
211    /// D5: writes to an `occur` property (captured, not applied).
212    pub occur_property_writes: usize,
213    /// D5: writes specifically to `occur.min`.
214    pub occur_min_writes: usize,
215    /// D5: writes specifically to `occur.max`.
216    pub occur_max_writes: usize,
217    /// D5: occur mutations captured as intent (no layout effect).
218    pub occur_mutations_captured: usize,
219    /// D5: occur mutations APPLIED to layout. **Always 0 in D5** (capture-only);
220    /// D6 bumps this when `XFA_OCCUR_APPLY=1` applies a captured `occur.min`.
221    pub occur_mutations_applied: usize,
222    /// D6: captured occur mutations NOT applied (rollback, apply-flag off,
223    /// dead/non-repeatable target, unsupported prop, negative value).
224    pub occur_mutations_skipped: usize,
225    /// D6: captured occur mutations skipped because the target node is not a
226    /// repeatable container (Subform/Area/ExclGroup) — fail-closed.
227    pub occur_application_ambiguous: usize,
228    /// D6: distinct form nodes whose occur was applied.
229    pub occur_application_targets: usize,
230    /// BE-1: `$data` bare-global intercepts resolved successfully (JS layer).
231    pub som_data_root_hits: usize,
232    /// BE-1: `#items` property accesses resolved to a non-empty item list.
233    pub som_items_path_hits: usize,
234}
235
236impl RuntimeMetadata {
237    /// True when the runtime never reported any error class.
238    pub fn is_clean(&self) -> bool {
239        self.runtime_errors == 0
240            && self.timeouts == 0
241            && self.oom == 0
242            && self.binding_errors == 0
243            && self.resolve_failures == 0
244    }
245
246    /// Add another metadata snapshot into this one.
247    pub fn accumulate(&mut self, other: RuntimeMetadata) {
248        self.executed = self.executed.saturating_add(other.executed);
249        self.runtime_errors = self.runtime_errors.saturating_add(other.runtime_errors);
250        self.timeouts = self.timeouts.saturating_add(other.timeouts);
251        self.oom = self.oom.saturating_add(other.oom);
252        self.host_calls = self.host_calls.saturating_add(other.host_calls);
253        self.mutations = self.mutations.saturating_add(other.mutations);
254        self.instance_writes = self.instance_writes.saturating_add(other.instance_writes);
255        self.list_writes = self.list_writes.saturating_add(other.list_writes);
256        self.binding_errors = self.binding_errors.saturating_add(other.binding_errors);
257        self.resolve_failures = self.resolve_failures.saturating_add(other.resolve_failures);
258        self.data_reads = self.data_reads.saturating_add(other.data_reads);
259        self.unsupported_host_calls = self
260            .unsupported_host_calls
261            .saturating_add(other.unsupported_host_calls);
262        self.probe_skips = self.probe_skips.saturating_add(other.probe_skips);
263        self.variables_scripts_collected = self
264            .variables_scripts_collected
265            .saturating_add(other.variables_scripts_collected);
266        self.variables_data_items_collected = self
267            .variables_data_items_collected
268            .saturating_add(other.variables_data_items_collected);
269        self.script_objects_registered = self
270            .script_objects_registered
271            .saturating_add(other.script_objects_registered);
272        self.script_objects_register_failed = self
273            .script_objects_register_failed
274            .saturating_add(other.script_objects_register_failed);
275        self.script_objects_subform_scoped = self
276            .script_objects_subform_scoped
277            .saturating_add(other.script_objects_subform_scoped);
278        self.som_lookups_total = self
279            .som_lookups_total
280            .saturating_add(other.som_lookups_total);
281        self.som_lookup_successes = self
282            .som_lookup_successes
283            .saturating_add(other.som_lookup_successes);
284        self.som_lookup_failures = self
285            .som_lookup_failures
286            .saturating_add(other.som_lookup_failures);
287        self.som_lookup_ambiguous = self
288            .som_lookup_ambiguous
289            .saturating_add(other.som_lookup_ambiguous);
290        self.som_subform_scripts_exposed = self
291            .som_subform_scripts_exposed
292            .saturating_add(other.som_subform_scripts_exposed);
293        self.som_occur_path_refs = self
294            .som_occur_path_refs
295            .saturating_add(other.som_occur_path_refs);
296        self.occur_lookups_total = self
297            .occur_lookups_total
298            .saturating_add(other.occur_lookups_total);
299        self.occur_lookup_successes = self
300            .occur_lookup_successes
301            .saturating_add(other.occur_lookup_successes);
302        self.occur_lookup_failures = self
303            .occur_lookup_failures
304            .saturating_add(other.occur_lookup_failures);
305        self.occur_property_reads = self
306            .occur_property_reads
307            .saturating_add(other.occur_property_reads);
308        self.occur_property_writes = self
309            .occur_property_writes
310            .saturating_add(other.occur_property_writes);
311        self.occur_min_writes = self.occur_min_writes.saturating_add(other.occur_min_writes);
312        self.occur_max_writes = self.occur_max_writes.saturating_add(other.occur_max_writes);
313        self.occur_mutations_captured = self
314            .occur_mutations_captured
315            .saturating_add(other.occur_mutations_captured);
316        self.occur_mutations_applied = self
317            .occur_mutations_applied
318            .saturating_add(other.occur_mutations_applied);
319        self.occur_mutations_skipped = self
320            .occur_mutations_skipped
321            .saturating_add(other.occur_mutations_skipped);
322        self.occur_application_ambiguous = self
323            .occur_application_ambiguous
324            .saturating_add(other.occur_application_ambiguous);
325        self.occur_application_targets = self
326            .occur_application_targets
327            .saturating_add(other.occur_application_targets);
328        self.som_data_root_hits = self
329            .som_data_root_hits
330            .saturating_add(other.som_data_root_hits);
331        self.som_items_path_hits = self
332            .som_items_path_hits
333            .saturating_add(other.som_items_path_hits);
334    }
335}
336
337/// Epic A E-2/E-3: verbose per-entry diagnostic logs drained alongside
338/// [`RuntimeMetadata`] from [`HostBindings`].  Empty unless
339/// `XFA_RUNTIME_DIAG=1` is set at the HostBindings call sites.
340#[derive(Debug, Default)]
341pub struct RuntimeDiagLogs {
342    /// E-2: SOM resolution misses (capped at 200).
343    pub som_fail_log: Vec<crate::dynamic::SomFailEntry>,
344    /// E-3: instanceManager write events (capped at 200).
345    pub instance_write_log: Vec<crate::dynamic::InstanceWriteEntry>,
346}
347
348/// Default per-script wall-clock budget enforced by the rquickjs
349/// backend (S-9). Exposed as a constant so tests can reason about it
350/// without depending on the runtime backend module.
351pub const DEFAULT_TIME_BUDGET_MS: u64 = 100;
352
353/// QF1-E / SEC-01 — default multiplier for the worker-level wall-time
354/// fallback. The primary timeout enforcement is the rquickjs interrupt
355/// callback (polled at JS opcode boundaries); when the script's elapsed
356/// wall-clock exceeds this multiplier × the configured time budget, the
357/// backend re-labels the resulting error as
358/// [`SandboxError::WallTimeExceeded`] (instead of [`SandboxError::Timeout`]).
359///
360/// The default of `5` means: if the interrupt callback fires within ≤ 5×
361/// the configured budget (the normal case for `while(true){}`), behaviour
362/// is byte-identical to v1: `SandboxError::Timeout` is emitted.
363///
364/// Only when the abort drags past 5× the budget — indicating the primary
365/// interrupt mechanism was unable to fire at the budget boundary, e.g.
366/// because execution was trapped inside a single C-level call — does the
367/// fallback variant surface. This is observable telemetry, not a new
368/// kill switch: the interrupt still does the actual aborting.
369///
370/// The multiplier is generous on purpose: small CI scheduling jitter on
371/// a 50 ms budget (1× = 50 ms; 5× = 250 ms) must never accidentally
372/// trip the fallback for a script the interrupt aborted cleanly.
373pub const WALLTIME_FALLBACK_MULTIPLIER_DEFAULT: u32 = 5;
374
375/// QF1-E / SEC-01 — environment variable that overrides the wall-time
376/// fallback multiplier ([`WALLTIME_FALLBACK_MULTIPLIER_DEFAULT`]). Only
377/// values that parse as a positive `u32` ≥ 2 are honoured; anything else
378/// (absent, empty, `"0"`, `"1"`, non-numeric) keeps the default. The
379/// minimum of 2 prevents an operator from accidentally collapsing the
380/// fallback onto the primary timeout boundary, which would re-classify
381/// every clean timeout as a wall-time fallback and corrupt observability.
382pub const ENV_WALLTIME_FALLBACK_MULTIPLIER: &str = "XFA_JS_WALLTIME_FALLBACK_MULTIPLIER";
383
384/// QF1-E / SEC-01 — read the wall-time fallback multiplier from the
385/// environment, clamped to a sane range. Returns
386/// [`WALLTIME_FALLBACK_MULTIPLIER_DEFAULT`] when the env var is absent,
387/// not a valid `u32`, or below the safety minimum of `2`.
388///
389/// Reads the env var once per call; the backend snapshots the result at
390/// construction time so per-test toggling is deterministic.
391pub fn walltime_fallback_multiplier() -> u32 {
392    std::env::var(ENV_WALLTIME_FALLBACK_MULTIPLIER)
393        .ok()
394        .and_then(|s| s.parse::<u32>().ok())
395        .filter(|&n| n >= 2)
396        .unwrap_or(WALLTIME_FALLBACK_MULTIPLIER_DEFAULT)
397}
398
399/// Default per-document memory budget enforced by the rquickjs backend
400/// (S-10).
401pub const DEFAULT_MEMORY_BUDGET_BYTES: usize = 32 * 1024 * 1024;
402
403/// Hard cap on script body size (S-11). Bodies above this length are
404/// rejected before any parse attempt.
405pub const MAX_SCRIPT_BODY_BYTES: usize = 64 * 1024;
406
407/// Hard cap on `<variables><script>` body size (W2-B).
408///
409/// Variables-scripts are form-level helper libraries (XFA 3.3 §5.5):
410/// they hold the top-level `var` / `function` declarations that event
411/// scripts call as `<scriptName>.<top_level_decl>(...)`. They run once
412/// per document at registration time, under the same per-script time
413/// budget ([`DEFAULT_TIME_BUDGET_MS`]) and same per-document memory
414/// budget ([`DEFAULT_MEMORY_BUDGET_BYTES`]) as event scripts.
415///
416/// Real-world government XFA forms (Canadian IRCC `imm5709e` / `imm5710e`,
417/// Canadian Revenue Agency `t2200` / `t2-fill`) ship variables-scripts
418/// such as `validateForm` (~125 KB), `CoreFunctions` (~115 KB) and
419/// `LOV` (~507 KB) — well above the 64 KB event-script cap. With the
420/// event-script cap applied to variables-scripts, registration fails
421/// silently with [`SandboxError::BodyTooLarge`] and the dependent
422/// event scripts cannot resolve `validateForm.X()` / `CoreFunctions.X()`,
423/// surfacing as the W1-B `implicit_function` cluster (impact 79 across
424/// 10 docs).
425///
426/// 1 MiB is intentionally above the largest observed real-world body
427/// (`LOV` ≈ 507 KB on `imm5710e`) so the cap remains a defence-in-depth
428/// stop and never a routine failure path. The time and memory budgets
429/// still bound runaway parse / execute cost.
430pub const MAX_VARIABLES_SCRIPT_BODY_BYTES: usize = 1024 * 1024;
431
432/// The activities for which the sandboxed runtime accepts dispatch.
433/// Other activities (`click`, `preSubmit`, `mouseEnter`, …) skip the
434/// runtime entirely at the [`crate::dynamic::apply_dynamic_scripts_with_mode`]
435/// boundary because they do not fire during static flatten (S-14).
436pub const SANDBOX_ACTIVITY_ALLOWLIST: &[&str] = &[
437    "initialize",
438    "calculate",
439    "validate",
440    "docReady",
441    "layoutReady",
442];
443
444/// True when `activity` is in [`SANDBOX_ACTIVITY_ALLOWLIST`].
445pub fn activity_allowed_for_sandbox(activity: Option<&str>) -> bool {
446    matches!(activity, Some(a) if SANDBOX_ACTIVITY_ALLOWLIST.contains(&a))
447}
448
449/// **D1.B gated allow.** Environment variable that opts an operator into the
450/// `preSave` dispatch path during flatten. Default OFF; any value other than
451/// `"1"` keeps the W3-B closure semantics (deny `preSave` at the dispatch
452/// gate AND at the host-binding gate).
453///
454/// Cross-ref: `docs/INST_MGR_ACTIVITY_POLICY.md` v5 §6.1 (D1.B).
455///
456/// **Stop-rules (enforced by tests, never by code):**
457/// 1. Only `preSave` is affected. `preSubmit`, `click`, and every other
458///    denylist activity stay denied regardless of this flag.
459/// 2. Default-OFF. Behaviour is byte-identical to v4 (W3-B closure) when
460///    the variable is absent or unset, or set to any value other than `"1"`.
461/// 3. Flipping requires an operator-signed waiver per the policy doc; the
462///    flag is read at dispatch (one snapshot per flatten), never globally
463///    memoised, so test harnesses can toggle it per-test.
464pub const ENV_PRESAVE_DURING_FLATTEN: &str = "XFA_PRESAVE_DURING_FLATTEN";
465
466/// True when `XFA_PRESAVE_DURING_FLATTEN=1`. Any other value (absent, empty,
467/// `"0"`, `"true"`, `"yes"`, casing variants) returns false. This is the
468/// only place that reads the environment for the D1.B gate — every other
469/// site receives a `bool` argument so tests can toggle deterministically.
470pub fn presave_during_flatten_enabled() -> bool {
471    std::env::var(ENV_PRESAVE_DURING_FLATTEN).ok().as_deref() == Some("1")
472}
473
474/// **D1.B gated allow.** Same as [`activity_allowed_for_sandbox`], but
475/// also accepts `Some("preSave")` when `presave_gate` is true.
476///
477/// `presave_gate` is computed once per flatten (via
478/// [`presave_during_flatten_enabled`]) and threaded down so the dispatch
479/// path can decide deterministically per script; the host-binding layer
480/// receives the same bool via [`HostBindings::set_presave_gate`].
481///
482/// Cross-ref: `docs/INST_MGR_ACTIVITY_POLICY.md` v5 §6.1 (D1.B).
483pub fn activity_allowed_for_sandbox_with_gate(activity: Option<&str>, presave_gate: bool) -> bool {
484    if activity_allowed_for_sandbox(activity) {
485        return true;
486    }
487    presave_gate && matches!(activity, Some("preSave"))
488}
489
490/// The host-side adapter the dispatch path calls. A minimal contract
491/// chosen so that swapping backends (rquickjs ↔ boa ↔ external sandbox)
492/// is one Cargo feature flag away.
493pub trait XfaJsRuntime {
494    /// One-time initialisation. Idempotent.
495    fn init(&mut self) -> Result<(), SandboxError>;
496
497    /// Reset per-document state (memory budget, instruction counter,
498    /// any cached compiled scripts). Called once per flatten.
499    fn reset_for_new_document(&mut self) -> Result<(), SandboxError>;
500
501    /// Phase C: install the `FormTree` the runtime should resolve paths
502    /// against and mutate. The dispatch path owns the mutable borrow and clears
503    /// the handle before returning.
504    fn set_form_handle(
505        &mut self,
506        _form: *mut FormTree,
507        _root_id: FormNodeId,
508    ) -> Result<(), SandboxError> {
509        Ok(())
510    }
511
512    /// Phase D-γ: install a read-only view of the `DataDom` for the current
513    /// document. Called once per document after `set_form_handle`, before any
514    /// scripts run. Default: no-op (backends without DataDom support ignore it).
515    ///
516    /// # Safety
517    /// Callers **must** guarantee that `dom` outlives all script execution for
518    /// this document (i.e. it must remain alive until `set_form_handle(null)`
519    /// is called). The runtime stores the pointer read-only and never writes
520    /// through it.
521    fn set_data_handle(&mut self, _dom: *const DataDom) {}
522
523    /// BE-1 tranche #1 (benign zero-instance SOM): install the set of
524    /// template-declared container names (`subform`/`subformSet`/`exclGroup`/
525    /// `area`) for the current document. Backends that resolve implicit SOM
526    /// identifiers use it to return a benign empty-node façade for a
527    /// declared-but-absent reference instead of `undefined` (Adobe semantics),
528    /// so guarded scripts (`if (!Sub.Child.isNull) {...} else {...}`) run their
529    /// else branch instead of throwing. Like [`set_data_handle`], the caller
530    /// installs this before script execution. Default: no-op (the static
531    /// `NullRuntime` ignores it, so the default/non-sandboxed path is
532    /// unaffected and stays byte-identical).
533    ///
534    /// [`set_data_handle`]: XfaJsRuntime::set_data_handle
535    fn set_declared_subform_names(&mut self, _names: std::collections::HashSet<String>) {}
536
537    /// Phase C: reset per-script host counters and install the current script
538    /// context node / activity. Backends without host bindings ignore it.
539    fn reset_per_script(
540        &mut self,
541        _current_id: FormNodeId,
542        _activity: Option<&str>,
543    ) -> Result<(), SandboxError> {
544        Ok(())
545    }
546
547    /// Phase C page-count foundation. The current flatten order runs scripts
548    /// before layout, so callers normally leave this at 0.
549    fn set_static_page_count(&mut self, _page_count: u32) -> Result<(), SandboxError> {
550        Ok(())
551    }
552
553    /// **D1.B gated allow.** Inform the runtime whether the
554    /// `XFA_PRESAVE_DURING_FLATTEN=1` opt-in is active for the current
555    /// flatten. The dispatch path computes this once per document via
556    /// [`presave_during_flatten_enabled`] and forwards it here so the host
557    /// binding layer can mirror the dispatch decision (defence-in-depth).
558    ///
559    /// Default: no-op. Backends without a host-binding gate ignore it.
560    /// The contract for backends that DO mirror the gate:
561    ///
562    /// 1. Default OFF: every call to [`HostBindings::write_activity_allowed`]
563    ///    with `current_activity = Some("preSave")` MUST return false.
564    /// 2. Gate ON: the same call MUST return true ONLY for `Some("preSave")`.
565    ///    Every other denylist activity (`preSubmit`, `click`, …) MUST
566    ///    continue to return false.
567    ///
568    /// Cross-ref: `docs/INST_MGR_ACTIVITY_POLICY.md` v5 §6.1 (D1.B).
569    fn set_presave_gate(&mut self, _enabled: bool) {}
570
571    /// Execute one script body inside the sandbox.
572    ///
573    /// `activity` is the enclosing `<event activity="...">` value if
574    /// any. The dispatch site has already filtered against
575    /// [`activity_allowed_for_sandbox`]; backends may treat unknown
576    /// activities as `PhaseDenied` for defence-in-depth.
577    fn execute_script(
578        &mut self,
579        activity: Option<&str>,
580        body: &str,
581    ) -> Result<RuntimeOutcome, SandboxError>;
582
583    /// Take the cumulative metadata since the last `take_metadata`
584    /// call (or since `reset_for_new_document`, whichever was later).
585    fn take_metadata(&mut self) -> RuntimeMetadata;
586
587    /// D6: drain captured `occur.min`/`occur.max` write intents
588    /// `(node_index, prop, value)` recorded during the script pass. The
589    /// dispatch path applies them (only when `XFA_OCCUR_APPLY=1`) after the
590    /// rollback decision. Default: none (non-sandboxed runtimes capture nothing).
591    fn take_occur_mutations(&mut self) -> Vec<(usize, String, i64)> {
592        Vec::new()
593    }
594
595    /// Epic A E-2/E-3: drain verbose per-entry diagnostic logs. Only
596    /// populated when `XFA_RUNTIME_DIAG=1` is set at the host call sites.
597    /// Default impl returns empty logs (NullRuntime, non-sandboxed paths).
598    fn take_diag_logs(&mut self) -> RuntimeDiagLogs {
599        RuntimeDiagLogs::default()
600    }
601}
602
603#[cfg(test)]
604mod tests {
605    use super::*;
606
607    #[test]
608    fn allowlist_accepts_initialize_and_calculate() {
609        assert!(activity_allowed_for_sandbox(Some("initialize")));
610        assert!(activity_allowed_for_sandbox(Some("calculate")));
611        assert!(activity_allowed_for_sandbox(Some("validate")));
612        assert!(activity_allowed_for_sandbox(Some("docReady")));
613        assert!(activity_allowed_for_sandbox(Some("layoutReady")));
614    }
615
616    // D1.B gate: when the flag is OFF (the default in tests via env::var
617    // being unset for the controlled key), `activity_allowed_for_sandbox_with_gate`
618    // is byte-identical to `activity_allowed_for_sandbox`. When the gate
619    // bool is wired ON, ONLY `preSave` flips to allowed — `preSubmit`,
620    // `click`, etc. stay denied (hard stop in §6.1 of the policy doc).
621    #[test]
622    fn presave_gate_off_matches_base_allowlist() {
623        for allowed in SANDBOX_ACTIVITY_ALLOWLIST {
624            assert!(activity_allowed_for_sandbox_with_gate(Some(allowed), false));
625        }
626        for denied in [
627            "preSave",
628            "preSubmit",
629            "click",
630            "mouseEnter",
631            "exit",
632            "postSave",
633        ] {
634            assert!(!activity_allowed_for_sandbox_with_gate(Some(denied), false));
635        }
636        assert!(!activity_allowed_for_sandbox_with_gate(None, false));
637    }
638
639    #[test]
640    fn presave_gate_on_unlocks_only_presave() {
641        // preSave flips from deny -> allow when the gate is ON.
642        assert!(activity_allowed_for_sandbox_with_gate(
643            Some("preSave"),
644            true
645        ));
646        // Hard-stop: every other denylist activity MUST stay denied.
647        for still_denied in [
648            "preSubmit",
649            "click",
650            "mouseEnter",
651            "mouseExit",
652            "exit",
653            "enter",
654            "change",
655            "postSave",
656            "postSubmit",
657            "ready",
658            "prePrint",
659            "postPrint",
660            "preOpen",
661            "full",
662        ] {
663            assert!(
664                !activity_allowed_for_sandbox_with_gate(Some(still_denied), true),
665                "{still_denied} must stay denied even with D1.B gate ON",
666            );
667        }
668        assert!(!activity_allowed_for_sandbox_with_gate(None, true));
669        // Default-OFF behaviour unchanged for the 5 lifecycle activities.
670        for allowed in SANDBOX_ACTIVITY_ALLOWLIST {
671            assert!(activity_allowed_for_sandbox_with_gate(Some(allowed), true));
672        }
673    }
674
675    // Note: the env-var helper `presave_during_flatten_enabled` is pinned
676    // by integration tests in `tests/m3b_phasePQ_presave_gated_w3repair_d1b.rs`
677    // (`d1b_default_off_keeps_presave_denied_at_dispatch`,
678    // `d1b_env_var_parsing_only_one_enables_gate`). Inline unit tests would
679    // race with `std::env` because cargo test runs in-process; the
680    // integration tests serialise env mutations behind a mutex guard.
681    //
682    // ENV_PRESAVE_DURING_FLATTEN constant is canonicalised at the const
683    // declaration site and never re-spelled in code.
684
685    #[test]
686    fn presave_env_var_constant_is_canonical_name() {
687        assert_eq!(ENV_PRESAVE_DURING_FLATTEN, "XFA_PRESAVE_DURING_FLATTEN");
688    }
689
690    #[test]
691    fn allowlist_rejects_ui_and_submit_activities() {
692        for ui in [
693            "click",
694            "mouseEnter",
695            "mouseExit",
696            "enter",
697            "exit",
698            "preSubmit",
699            "postSubmit",
700            "ready",
701        ] {
702            assert!(
703                !activity_allowed_for_sandbox(Some(ui)),
704                "{ui} must not be allowed",
705            );
706        }
707        assert!(!activity_allowed_for_sandbox(None));
708    }
709
710    #[test]
711    fn metadata_is_clean_when_zero() {
712        assert!(RuntimeMetadata::default().is_clean());
713        let mut m = RuntimeMetadata {
714            executed: 5,
715            ..Default::default()
716        };
717        assert!(m.is_clean(), "executed counter does not flip cleanliness");
718        m.runtime_errors = 1;
719        assert!(!m.is_clean());
720    }
721
722    #[test]
723    // Intentional const-floor contract assertions: these pin the safety
724    // floors of compile-time budget constants. assertions_on_constants is
725    // expected and desired here.
726    #[allow(clippy::assertions_on_constants)]
727    fn budget_constants_are_sane() {
728        assert!(MAX_SCRIPT_BODY_BYTES >= 4096);
729        assert!(DEFAULT_TIME_BUDGET_MS >= 25);
730        assert!(DEFAULT_MEMORY_BUDGET_BYTES >= 1024 * 1024);
731    }
732
733    // QF1-E: the wall-time fallback multiplier must be strictly > 1 so a
734    // clean interrupt at the budget boundary cannot be reclassified as
735    // WallTimeExceeded. 5 is the documented default; this test pins it as
736    // a contract between this module and the backend.
737    // Compile-time const-assert avoids `clippy::assertions_on_constants`
738    // while still failing the build if the safety floor is broken.
739    const _WALLTIME_SAFE_MIN: () = assert!(
740        WALLTIME_FALLBACK_MULTIPLIER_DEFAULT >= 2,
741        "multiplier < 2 would re-label normal Timeouts as WallTimeExceeded"
742    );
743
744    #[test]
745    fn walltime_fallback_multiplier_default_is_safe() {
746        // The compile-time `_WALLTIME_SAFE_MIN` const above is the real
747        // contract; this runtime test pins the concrete value so a
748        // refactor that changes the default but leaves the floor intact
749        // is still caught here.
750        assert_eq!(WALLTIME_FALLBACK_MULTIPLIER_DEFAULT, 5);
751    }
752
753    #[test]
754    fn walltime_fallback_env_var_name_is_canonical() {
755        assert_eq!(
756            ENV_WALLTIME_FALLBACK_MULTIPLIER,
757            "XFA_JS_WALLTIME_FALLBACK_MULTIPLIER"
758        );
759    }
760
761    // W2-B: variables-script body cap must be strictly higher than the
762    // event-script cap so XFA helper libraries (validateForm, LOV,
763    // CoreFunctions) register; it must still be bounded so an oversize
764    // body cannot bypass static defence-in-depth before the per-document
765    // memory budget engages.
766    #[test]
767    // Intentional const-floor contract assertions on compile-time caps.
768    #[allow(clippy::assertions_on_constants)]
769    fn variables_script_cap_is_above_event_cap_and_bounded() {
770        assert!(
771            MAX_VARIABLES_SCRIPT_BODY_BYTES > MAX_SCRIPT_BODY_BYTES,
772            "variables-script cap must exceed event-script cap"
773        );
774        // Sanity: must be high enough to register the largest observed
775        // real-world variables-script library (LOV ≈ 507 KB).
776        assert!(
777            MAX_VARIABLES_SCRIPT_BODY_BYTES >= 768 * 1024,
778            "variables-script cap below observed real-world max"
779        );
780        // Sanity: must be bounded well under the per-document memory
781        // budget so a single oversize body cannot consume the entire
782        // budget on parse alone.
783        assert!(
784            MAX_VARIABLES_SCRIPT_BODY_BYTES <= DEFAULT_MEMORY_BUDGET_BYTES / 8,
785            "variables-script cap must stay an order of magnitude below memory budget"
786        );
787    }
788}
pdf_xfa/js_runtime/mod.rs

pdf_xfa/js_runtime/
mod.rs