xfa_layout_engine/ir/mod.rs
1//! LayoutTreeIR — canonical, deterministic intermediate representation of XFA layout state.
2//!
3//! This module is part of M1 (Observability Foundation). It exists so that
4//! XFA fidelity debugging can reason about a stable, snapshottable tree of
5//! layout decisions instead of comparing rasterised PDFs page by page.
6//!
7//! ## Design constraints
8//!
9//! - **No new dependencies.** All types are plain Rust; the canonical-JSON
10//! serializer in [`canonical_json`] is hand-rolled and uses only `std`.
11//! - **Deterministic by construction.** The IR has no `HashMap`/`HashSet`,
12//! no `Instant::now`, no RNG, and no process-id leakage. Children are
13//! ordered by an explicit `Vec`. Object keys in the JSON output are
14//! emitted alphabetically. Floats are formatted with fixed precision.
15//! - **Off by default.** Constructing an IR is opt-in: the engine never
16//! builds one unless a caller asks for it. There is no global state.
17//! - **Stable schema.** [`SCHEMA_VERSION`] is part of the JSON output.
18//! Field additions bump the version; field renames bump the major (when
19//! we ever ship a v2). v1 is intentionally minimal.
20//!
21//! ## Out of scope (M1 v1)
22//!
23//! - Population from a real XFA pipeline. v1 ships the data types, a
24//! deterministic serializer, and a synthetic-fixture snapshot harness.
25//! Wiring the engine to populate `LayoutTreeIR` from a real
26//! `flatten_xfa_to_pdf` call is deferred to a follow-up.
27//! - Renderer state, font tables, PDF object refs. The IR sits *before*
28//! rasterisation; it captures what the engine decided, not what was
29//! drawn.
30
31pub mod canonical_json;
32pub mod version;
33
34pub use version::SCHEMA_VERSION;
35
36use crate::types::Rect;
37
38/// Deterministic identifier for a layout node, derived from its position
39/// in the tree (sequence of child indices from the root).
40///
41/// Root has an empty path. The first child of root is `"0"`. The third
42/// child of the second child of root is `"1/2"`. The encoding is
43/// human-readable and stable across runs and platforms.
44#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
45pub struct LayoutNodeId(String);
46
47impl LayoutNodeId {
48 /// The identifier of the tree's root.
49 pub fn root() -> Self {
50 Self(String::new())
51 }
52
53 /// Derive the identifier of the `index`-th child of `self`.
54 pub fn child(&self, index: usize) -> Self {
55 if self.0.is_empty() {
56 Self(index.to_string())
57 } else {
58 Self(format!("{}/{}", self.0, index))
59 }
60 }
61
62 /// Borrow the path string. Empty for the root.
63 pub fn as_str(&self) -> &str {
64 &self.0
65 }
66}
67
68/// Kind of a layout node in the IR.
69///
70/// `Other` covers anything we don't classify yet; it intentionally does not
71/// carry payload because v1 of the IR aims for a closed, reviewable enum.
72#[non_exhaustive]
73#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
74pub enum NodeKind {
75 /// The root sentinel of the layout tree.
76 Root,
77 /// `pageSet` container.
78 PageSet,
79 /// `pageArea` container.
80 PageArea,
81 /// `contentArea` container.
82 ContentArea,
83 /// `subform` container.
84 Subform,
85 /// `subformSet` container.
86 SubformSet,
87 /// Form `field`.
88 Field,
89 /// `draw` (static decoration).
90 Draw,
91 /// `exclGroup` (radio-button-like exclusive group).
92 ExclGroup,
93 /// Anything not yet classified.
94 #[default]
95 Other,
96}
97
98impl NodeKind {
99 /// Stable string tag used in canonical JSON output.
100 pub fn tag(self) -> &'static str {
101 match self {
102 NodeKind::Root => "root",
103 NodeKind::PageSet => "page_set",
104 NodeKind::PageArea => "page_area",
105 NodeKind::ContentArea => "content_area",
106 NodeKind::Subform => "subform",
107 NodeKind::SubformSet => "subform_set",
108 NodeKind::Field => "field",
109 NodeKind::Draw => "draw",
110 NodeKind::ExclGroup => "excl_group",
111 NodeKind::Other => "other",
112 }
113 }
114}
115
116/// Visibility/presence state.
117///
118/// Mirrors the XFA `presence` attribute plus an explicit `Unknown` for
119/// snapshots taken before resolution.
120#[non_exhaustive]
121#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
122pub enum PresenceIR {
123 /// Default; rendered.
124 #[default]
125 Visible,
126 /// Hidden via `presence="hidden"`.
127 Hidden,
128 /// Inactive (not rendered, not data-bound).
129 Inactive,
130 /// Invisible (skipped during rendering but still in the tree).
131 Invisible,
132 /// Presence not determined yet at the snapshot point.
133 Unknown,
134}
135
136impl PresenceIR {
137 /// Stable string tag used in canonical JSON output.
138 pub fn tag(self) -> &'static str {
139 match self {
140 PresenceIR::Visible => "visible",
141 PresenceIR::Hidden => "hidden",
142 PresenceIR::Inactive => "inactive",
143 PresenceIR::Invisible => "invisible",
144 PresenceIR::Unknown => "unknown",
145 }
146 }
147}
148
149/// Field-specific kind tag for nodes whose [`NodeKind`] is `Field`.
150///
151/// Other nodes carry `None` for [`LayoutNode::field_kind`].
152#[non_exhaustive]
153#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
154pub enum FieldKindIR {
155 /// Plain text edit.
156 Text,
157 /// Numeric edit.
158 NumericEdit,
159 /// Choice list / dropdown.
160 Choice,
161 /// Date/time edit.
162 DateTime,
163 /// Signature field.
164 Signature,
165 /// Barcode field.
166 Barcode,
167 /// Image field.
168 Image,
169 /// Button.
170 Button,
171 /// Anything not yet classified.
172 Other,
173}
174
175impl FieldKindIR {
176 /// Stable string tag used in canonical JSON output.
177 pub fn tag(self) -> &'static str {
178 match self {
179 FieldKindIR::Text => "text",
180 FieldKindIR::NumericEdit => "numeric_edit",
181 FieldKindIR::Choice => "choice",
182 FieldKindIR::DateTime => "date_time",
183 FieldKindIR::Signature => "signature",
184 FieldKindIR::Barcode => "barcode",
185 FieldKindIR::Image => "image",
186 FieldKindIR::Button => "button",
187 FieldKindIR::Other => "other",
188 }
189 }
190}
191
192/// Overflow / split state of a node at IR-capture time.
193#[non_exhaustive]
194#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
195pub enum OverflowState {
196 /// No overflow.
197 #[default]
198 None,
199 /// Container split across pages.
200 Split,
201 /// Deferred entirely to the next page.
202 DeferredToNextPage,
203 /// Clipped to its parent's content area.
204 Clipped,
205 /// Overflow status not determined yet at the snapshot point.
206 Unknown,
207}
208
209impl OverflowState {
210 /// Stable string tag used in canonical JSON output.
211 pub fn tag(self) -> &'static str {
212 match self {
213 OverflowState::None => "none",
214 OverflowState::Split => "split",
215 OverflowState::DeferredToNextPage => "deferred_to_next_page",
216 OverflowState::Clipped => "clipped",
217 OverflowState::Unknown => "unknown",
218 }
219 }
220}
221
222/// One node in the layout tree IR.
223///
224/// All fields are public so that consumers can construct synthetic IRs in
225/// tests without going through a builder. Construction order does not
226/// affect serialization order — the canonical-JSON serializer always
227/// emits object keys alphabetically.
228#[derive(Debug, Clone, PartialEq, Default)]
229pub struct LayoutNode {
230 /// Stable, path-derived identifier (see [`LayoutNodeId`]).
231 pub id: LayoutNodeId,
232 /// Coarse kind of this node.
233 pub kind: NodeKind,
234 /// Optional Scripting Object Model (SOM) path or name.
235 pub som: Option<String>,
236 /// Optional page index this node was placed on, 0-based.
237 pub page_index: Option<u32>,
238 /// Optional layout rectangle in points (1pt = 1/72 inch).
239 pub rect: Option<Rect>,
240 /// Presence/visibility state.
241 pub presence: PresenceIR,
242 /// Field-specific kind tag, only meaningful when `kind == Field`.
243 pub field_kind: Option<FieldKindIR>,
244 /// Optional 16-hex-char prefix of a value/text hash. We never serialize
245 /// the raw value to keep snapshots small and to avoid leaking PII into
246 /// repository-tracked goldens.
247 pub value_hash: Option<String>,
248 /// Overflow/split state at IR-capture time.
249 pub overflow: OverflowState,
250 /// Optional cross-reference to a `FormNodeId.0` value, useful when
251 /// correlating IR snapshots against the original FormDOM.
252 pub form_node_id: Option<u64>,
253 /// Children, in their layout order.
254 pub children: Vec<LayoutNode>,
255}
256
257impl LayoutNode {
258 /// Convenience constructor for a node with default fields and a given id+kind.
259 pub fn new(id: LayoutNodeId, kind: NodeKind) -> Self {
260 Self {
261 id,
262 kind,
263 ..Default::default()
264 }
265 }
266
267 /// Add a child and return its position.
268 ///
269 /// The caller is responsible for using [`LayoutNodeId::child`] to derive
270 /// the child's id; this method does not mutate the child.
271 pub fn push_child(&mut self, child: LayoutNode) -> usize {
272 self.children.push(child);
273 self.children.len() - 1
274 }
275}
276
277/// Top-level IR document.
278#[derive(Debug, Clone, PartialEq)]
279pub struct LayoutTreeIR {
280 /// Schema version of this IR document. See [`SCHEMA_VERSION`].
281 pub schema_version: u32,
282 /// Root layout node.
283 pub root: LayoutNode,
284}
285
286impl Default for LayoutTreeIR {
287 fn default() -> Self {
288 Self {
289 schema_version: SCHEMA_VERSION,
290 root: LayoutNode::new(LayoutNodeId::root(), NodeKind::Root),
291 }
292 }
293}
294
295impl LayoutTreeIR {
296 /// Build an empty IR rooted at a `Root` node.
297 pub fn new() -> Self {
298 Self::default()
299 }
300
301 /// Render this IR to a canonical, deterministic JSON string.
302 ///
303 /// See [`canonical_json`] for the formatting contract.
304 pub fn to_canonical_json(&self) -> String {
305 let mut out = String::new();
306 canonical_json::write_tree(&mut out, self);
307 out
308 }
309
310 /// Total number of nodes in the tree, including the root.
311 pub fn node_count(&self) -> usize {
312 fn walk(n: &LayoutNode) -> usize {
313 1 + n.children.iter().map(walk).sum::<usize>()
314 }
315 walk(&self.root)
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322
323 #[test]
324 fn root_id_is_empty() {
325 assert_eq!(LayoutNodeId::root().as_str(), "");
326 }
327
328 #[test]
329 fn child_id_is_path() {
330 let r = LayoutNodeId::root();
331 let c0 = r.child(0);
332 let c01 = c0.child(1);
333 assert_eq!(c0.as_str(), "0");
334 assert_eq!(c01.as_str(), "0/1");
335 }
336
337 #[test]
338 fn empty_tree_node_count_is_one() {
339 let tree = LayoutTreeIR::new();
340 assert_eq!(tree.node_count(), 1);
341 assert_eq!(tree.schema_version, SCHEMA_VERSION);
342 }
343
344 #[test]
345 fn synthetic_tree_node_count() {
346 let mut root = LayoutNode::new(LayoutNodeId::root(), NodeKind::Root);
347 let mut p0 = LayoutNode::new(root.id.child(0), NodeKind::PageArea);
348 p0.push_child(LayoutNode::new(p0.id.child(0), NodeKind::ContentArea));
349 p0.push_child(LayoutNode::new(p0.id.child(1), NodeKind::Field));
350 root.push_child(p0);
351 let tree = LayoutTreeIR {
352 schema_version: SCHEMA_VERSION,
353 root,
354 };
355 assert_eq!(tree.node_count(), 4);
356 }
357
358 #[test]
359 fn presence_default_is_visible() {
360 let n = LayoutNode::default();
361 assert_eq!(n.presence, PresenceIR::Visible);
362 }
363
364 #[test]
365 fn overflow_default_is_none() {
366 let n = LayoutNode::default();
367 assert_eq!(n.overflow, OverflowState::None);
368 }
369}