Skip to main content

xfa_layout_engine/ir/
mod.rs

1//! LayoutTreeIR — canonical, deterministic intermediate representation of XFA layout state.
2//!
3//! This module is part of M1 (Observability Foundation). It exists so that
4//! XFA fidelity debugging can reason about a stable, snapshottable tree of
5//! layout decisions instead of comparing rasterised PDFs page by page.
6//!
7//! ## Design constraints
8//!
9//! - **No new dependencies.** All types are plain Rust; the canonical-JSON
10//!   serializer in [`canonical_json`] is hand-rolled and uses only `std`.
11//! - **Deterministic by construction.** The IR has no `HashMap`/`HashSet`,
12//!   no `Instant::now`, no RNG, and no process-id leakage. Children are
13//!   ordered by an explicit `Vec`. Object keys in the JSON output are
14//!   emitted alphabetically. Floats are formatted with fixed precision.
15//! - **Off by default.** Constructing an IR is opt-in: the engine never
16//!   builds one unless a caller asks for it. There is no global state.
17//! - **Stable schema.** [`SCHEMA_VERSION`] is part of the JSON output.
18//!   Field additions bump the version; field renames bump the major (when
19//!   we ever ship a v2). v1 is intentionally minimal.
20//!
21//! ## Out of scope (M1 v1)
22//!
23//! - Population from a real XFA pipeline. v1 ships the data types, a
24//!   deterministic serializer, and a synthetic-fixture snapshot harness.
25//!   Wiring the engine to populate `LayoutTreeIR` from a real
26//!   `flatten_xfa_to_pdf` call is deferred to a follow-up.
27//! - Renderer state, font tables, PDF object refs. The IR sits *before*
28//!   rasterisation; it captures what the engine decided, not what was
29//!   drawn.
30
31pub mod canonical_json;
32pub mod version;
33
34pub use version::SCHEMA_VERSION;
35
36use crate::types::Rect;
37
38/// Deterministic identifier for a layout node, derived from its position
39/// in the tree (sequence of child indices from the root).
40///
41/// Root has an empty path. The first child of root is `"0"`. The third
42/// child of the second child of root is `"1/2"`. The encoding is
43/// human-readable and stable across runs and platforms.
44#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
45pub struct LayoutNodeId(String);
46
47impl LayoutNodeId {
48    /// The identifier of the tree's root.
49    pub fn root() -> Self {
50        Self(String::new())
51    }
52
53    /// Derive the identifier of the `index`-th child of `self`.
54    pub fn child(&self, index: usize) -> Self {
55        if self.0.is_empty() {
56            Self(index.to_string())
57        } else {
58            Self(format!("{}/{}", self.0, index))
59        }
60    }
61
62    /// Borrow the path string. Empty for the root.
63    pub fn as_str(&self) -> &str {
64        &self.0
65    }
66}
67
68/// Kind of a layout node in the IR.
69///
70/// `Other` covers anything we don't classify yet; it intentionally does not
71/// carry payload because v1 of the IR aims for a closed, reviewable enum.
72#[non_exhaustive]
73#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
74pub enum NodeKind {
75    /// The root sentinel of the layout tree.
76    Root,
77    /// `pageSet` container.
78    PageSet,
79    /// `pageArea` container.
80    PageArea,
81    /// `contentArea` container.
82    ContentArea,
83    /// `subform` container.
84    Subform,
85    /// `subformSet` container.
86    SubformSet,
87    /// Form `field`.
88    Field,
89    /// `draw` (static decoration).
90    Draw,
91    /// `exclGroup` (radio-button-like exclusive group).
92    ExclGroup,
93    /// Anything not yet classified.
94    #[default]
95    Other,
96}
97
98impl NodeKind {
99    /// Stable string tag used in canonical JSON output.
100    pub fn tag(self) -> &'static str {
101        match self {
102            NodeKind::Root => "root",
103            NodeKind::PageSet => "page_set",
104            NodeKind::PageArea => "page_area",
105            NodeKind::ContentArea => "content_area",
106            NodeKind::Subform => "subform",
107            NodeKind::SubformSet => "subform_set",
108            NodeKind::Field => "field",
109            NodeKind::Draw => "draw",
110            NodeKind::ExclGroup => "excl_group",
111            NodeKind::Other => "other",
112        }
113    }
114}
115
116/// Visibility/presence state.
117///
118/// Mirrors the XFA `presence` attribute plus an explicit `Unknown` for
119/// snapshots taken before resolution.
120#[non_exhaustive]
121#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
122pub enum PresenceIR {
123    /// Default; rendered.
124    #[default]
125    Visible,
126    /// Hidden via `presence="hidden"`.
127    Hidden,
128    /// Inactive (not rendered, not data-bound).
129    Inactive,
130    /// Invisible (skipped during rendering but still in the tree).
131    Invisible,
132    /// Presence not determined yet at the snapshot point.
133    Unknown,
134}
135
136impl PresenceIR {
137    /// Stable string tag used in canonical JSON output.
138    pub fn tag(self) -> &'static str {
139        match self {
140            PresenceIR::Visible => "visible",
141            PresenceIR::Hidden => "hidden",
142            PresenceIR::Inactive => "inactive",
143            PresenceIR::Invisible => "invisible",
144            PresenceIR::Unknown => "unknown",
145        }
146    }
147}
148
149/// Field-specific kind tag for nodes whose [`NodeKind`] is `Field`.
150///
151/// Other nodes carry `None` for [`LayoutNode::field_kind`].
152#[non_exhaustive]
153#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
154pub enum FieldKindIR {
155    /// Plain text edit.
156    Text,
157    /// Numeric edit.
158    NumericEdit,
159    /// Choice list / dropdown.
160    Choice,
161    /// Date/time edit.
162    DateTime,
163    /// Signature field.
164    Signature,
165    /// Barcode field.
166    Barcode,
167    /// Image field.
168    Image,
169    /// Button.
170    Button,
171    /// Anything not yet classified.
172    Other,
173}
174
175impl FieldKindIR {
176    /// Stable string tag used in canonical JSON output.
177    pub fn tag(self) -> &'static str {
178        match self {
179            FieldKindIR::Text => "text",
180            FieldKindIR::NumericEdit => "numeric_edit",
181            FieldKindIR::Choice => "choice",
182            FieldKindIR::DateTime => "date_time",
183            FieldKindIR::Signature => "signature",
184            FieldKindIR::Barcode => "barcode",
185            FieldKindIR::Image => "image",
186            FieldKindIR::Button => "button",
187            FieldKindIR::Other => "other",
188        }
189    }
190}
191
192/// Overflow / split state of a node at IR-capture time.
193#[non_exhaustive]
194#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
195pub enum OverflowState {
196    /// No overflow.
197    #[default]
198    None,
199    /// Container split across pages.
200    Split,
201    /// Deferred entirely to the next page.
202    DeferredToNextPage,
203    /// Clipped to its parent's content area.
204    Clipped,
205    /// Overflow status not determined yet at the snapshot point.
206    Unknown,
207}
208
209impl OverflowState {
210    /// Stable string tag used in canonical JSON output.
211    pub fn tag(self) -> &'static str {
212        match self {
213            OverflowState::None => "none",
214            OverflowState::Split => "split",
215            OverflowState::DeferredToNextPage => "deferred_to_next_page",
216            OverflowState::Clipped => "clipped",
217            OverflowState::Unknown => "unknown",
218        }
219    }
220}
221
222/// One node in the layout tree IR.
223///
224/// All fields are public so that consumers can construct synthetic IRs in
225/// tests without going through a builder. Construction order does not
226/// affect serialization order — the canonical-JSON serializer always
227/// emits object keys alphabetically.
228#[derive(Debug, Clone, PartialEq, Default)]
229pub struct LayoutNode {
230    /// Stable, path-derived identifier (see [`LayoutNodeId`]).
231    pub id: LayoutNodeId,
232    /// Coarse kind of this node.
233    pub kind: NodeKind,
234    /// Optional Scripting Object Model (SOM) path or name.
235    pub som: Option<String>,
236    /// Optional page index this node was placed on, 0-based.
237    pub page_index: Option<u32>,
238    /// Optional layout rectangle in points (1pt = 1/72 inch).
239    pub rect: Option<Rect>,
240    /// Presence/visibility state.
241    pub presence: PresenceIR,
242    /// Field-specific kind tag, only meaningful when `kind == Field`.
243    pub field_kind: Option<FieldKindIR>,
244    /// Optional 16-hex-char prefix of a value/text hash. We never serialize
245    /// the raw value to keep snapshots small and to avoid leaking PII into
246    /// repository-tracked goldens.
247    pub value_hash: Option<String>,
248    /// Overflow/split state at IR-capture time.
249    pub overflow: OverflowState,
250    /// Optional cross-reference to a `FormNodeId.0` value, useful when
251    /// correlating IR snapshots against the original FormDOM.
252    pub form_node_id: Option<u64>,
253    /// Children, in their layout order.
254    pub children: Vec<LayoutNode>,
255}
256
257impl LayoutNode {
258    /// Convenience constructor for a node with default fields and a given id+kind.
259    pub fn new(id: LayoutNodeId, kind: NodeKind) -> Self {
260        Self {
261            id,
262            kind,
263            ..Default::default()
264        }
265    }
266
267    /// Add a child and return its position.
268    ///
269    /// The caller is responsible for using [`LayoutNodeId::child`] to derive
270    /// the child's id; this method does not mutate the child.
271    pub fn push_child(&mut self, child: LayoutNode) -> usize {
272        self.children.push(child);
273        self.children.len() - 1
274    }
275}
276
277/// Top-level IR document.
278#[derive(Debug, Clone, PartialEq)]
279pub struct LayoutTreeIR {
280    /// Schema version of this IR document. See [`SCHEMA_VERSION`].
281    pub schema_version: u32,
282    /// Root layout node.
283    pub root: LayoutNode,
284}
285
286impl Default for LayoutTreeIR {
287    fn default() -> Self {
288        Self {
289            schema_version: SCHEMA_VERSION,
290            root: LayoutNode::new(LayoutNodeId::root(), NodeKind::Root),
291        }
292    }
293}
294
295impl LayoutTreeIR {
296    /// Build an empty IR rooted at a `Root` node.
297    pub fn new() -> Self {
298        Self::default()
299    }
300
301    /// Render this IR to a canonical, deterministic JSON string.
302    ///
303    /// See [`canonical_json`] for the formatting contract.
304    pub fn to_canonical_json(&self) -> String {
305        let mut out = String::new();
306        canonical_json::write_tree(&mut out, self);
307        out
308    }
309
310    /// Total number of nodes in the tree, including the root.
311    pub fn node_count(&self) -> usize {
312        fn walk(n: &LayoutNode) -> usize {
313            1 + n.children.iter().map(walk).sum::<usize>()
314        }
315        walk(&self.root)
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn root_id_is_empty() {
325        assert_eq!(LayoutNodeId::root().as_str(), "");
326    }
327
328    #[test]
329    fn child_id_is_path() {
330        let r = LayoutNodeId::root();
331        let c0 = r.child(0);
332        let c01 = c0.child(1);
333        assert_eq!(c0.as_str(), "0");
334        assert_eq!(c01.as_str(), "0/1");
335    }
336
337    #[test]
338    fn empty_tree_node_count_is_one() {
339        let tree = LayoutTreeIR::new();
340        assert_eq!(tree.node_count(), 1);
341        assert_eq!(tree.schema_version, SCHEMA_VERSION);
342    }
343
344    #[test]
345    fn synthetic_tree_node_count() {
346        let mut root = LayoutNode::new(LayoutNodeId::root(), NodeKind::Root);
347        let mut p0 = LayoutNode::new(root.id.child(0), NodeKind::PageArea);
348        p0.push_child(LayoutNode::new(p0.id.child(0), NodeKind::ContentArea));
349        p0.push_child(LayoutNode::new(p0.id.child(1), NodeKind::Field));
350        root.push_child(p0);
351        let tree = LayoutTreeIR {
352            schema_version: SCHEMA_VERSION,
353            root,
354        };
355        assert_eq!(tree.node_count(), 4);
356    }
357
358    #[test]
359    fn presence_default_is_visible() {
360        let n = LayoutNode::default();
361        assert_eq!(n.presence, PresenceIR::Visible);
362    }
363
364    #[test]
365    fn overflow_default_is_none() {
366        let n = LayoutNode::default();
367        assert_eq!(n.overflow, OverflowState::None);
368    }
369}