xfa-layout-engine 1.0.0-beta.5

Box-model and pagination layout engine for XFA forms. Experimental — part of the PDFluent XFA stack, under active development.
Documentation
//! LayoutTreeIR — canonical, deterministic intermediate representation of XFA layout state.
//!
//! This module is part of M1 (Observability Foundation). It exists so that
//! XFA fidelity debugging can reason about a stable, snapshottable tree of
//! layout decisions instead of comparing rasterised PDFs page by page.
//!
//! ## Design constraints
//!
//! - **No new dependencies.** All types are plain Rust; the canonical-JSON
//!   serializer in [`canonical_json`] is hand-rolled and uses only `std`.
//! - **Deterministic by construction.** The IR has no `HashMap`/`HashSet`,
//!   no `Instant::now`, no RNG, and no process-id leakage. Children are
//!   ordered by an explicit `Vec`. Object keys in the JSON output are
//!   emitted alphabetically. Floats are formatted with fixed precision.
//! - **Off by default.** Constructing an IR is opt-in: the engine never
//!   builds one unless a caller asks for it. There is no global state.
//! - **Stable schema.** [`SCHEMA_VERSION`] is part of the JSON output.
//!   Field additions bump the version; field renames bump the major (when
//!   we ever ship a v2). v1 is intentionally minimal.
//!
//! ## Out of scope (M1 v1)
//!
//! - Population from a real XFA pipeline. v1 ships the data types, a
//!   deterministic serializer, and a synthetic-fixture snapshot harness.
//!   Wiring the engine to populate `LayoutTreeIR` from a real
//!   `flatten_xfa_to_pdf` call is deferred to a follow-up.
//! - Renderer state, font tables, PDF object refs. The IR sits *before*
//!   rasterisation; it captures what the engine decided, not what was
//!   drawn.

pub mod canonical_json;
pub mod version;

pub use version::SCHEMA_VERSION;

use crate::types::Rect;

/// Deterministic identifier for a layout node, derived from its position
/// in the tree (sequence of child indices from the root).
///
/// Root has an empty path. The first child of root is `"0"`. The third
/// child of the second child of root is `"1/2"`. The encoding is
/// human-readable and stable across runs and platforms.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub struct LayoutNodeId(String);

impl LayoutNodeId {
    /// The identifier of the tree's root.
    pub fn root() -> Self {
        Self(String::new())
    }

    /// Derive the identifier of the `index`-th child of `self`.
    pub fn child(&self, index: usize) -> Self {
        if self.0.is_empty() {
            Self(index.to_string())
        } else {
            Self(format!("{}/{}", self.0, index))
        }
    }

    /// Borrow the path string. Empty for the root.
    pub fn as_str(&self) -> &str {
        &self.0
    }
}

/// Kind of a layout node in the IR.
///
/// `Other` covers anything we don't classify yet; it intentionally does not
/// carry payload because v1 of the IR aims for a closed, reviewable enum.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
pub enum NodeKind {
    /// The root sentinel of the layout tree.
    Root,
    /// `pageSet` container.
    PageSet,
    /// `pageArea` container.
    PageArea,
    /// `contentArea` container.
    ContentArea,
    /// `subform` container.
    Subform,
    /// `subformSet` container.
    SubformSet,
    /// Form `field`.
    Field,
    /// `draw` (static decoration).
    Draw,
    /// `exclGroup` (radio-button-like exclusive group).
    ExclGroup,
    /// Anything not yet classified.
    #[default]
    Other,
}

impl NodeKind {
    /// Stable string tag used in canonical JSON output.
    pub fn tag(self) -> &'static str {
        match self {
            NodeKind::Root => "root",
            NodeKind::PageSet => "page_set",
            NodeKind::PageArea => "page_area",
            NodeKind::ContentArea => "content_area",
            NodeKind::Subform => "subform",
            NodeKind::SubformSet => "subform_set",
            NodeKind::Field => "field",
            NodeKind::Draw => "draw",
            NodeKind::ExclGroup => "excl_group",
            NodeKind::Other => "other",
        }
    }
}

/// Visibility/presence state.
///
/// Mirrors the XFA `presence` attribute plus an explicit `Unknown` for
/// snapshots taken before resolution.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
pub enum PresenceIR {
    /// Default; rendered.
    #[default]
    Visible,
    /// Hidden via `presence="hidden"`.
    Hidden,
    /// Inactive (not rendered, not data-bound).
    Inactive,
    /// Invisible (skipped during rendering but still in the tree).
    Invisible,
    /// Presence not determined yet at the snapshot point.
    Unknown,
}

impl PresenceIR {
    /// Stable string tag used in canonical JSON output.
    pub fn tag(self) -> &'static str {
        match self {
            PresenceIR::Visible => "visible",
            PresenceIR::Hidden => "hidden",
            PresenceIR::Inactive => "inactive",
            PresenceIR::Invisible => "invisible",
            PresenceIR::Unknown => "unknown",
        }
    }
}

/// Field-specific kind tag for nodes whose [`NodeKind`] is `Field`.
///
/// Other nodes carry `None` for [`LayoutNode::field_kind`].
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum FieldKindIR {
    /// Plain text edit.
    Text,
    /// Numeric edit.
    NumericEdit,
    /// Choice list / dropdown.
    Choice,
    /// Date/time edit.
    DateTime,
    /// Signature field.
    Signature,
    /// Barcode field.
    Barcode,
    /// Image field.
    Image,
    /// Button.
    Button,
    /// Anything not yet classified.
    Other,
}

impl FieldKindIR {
    /// Stable string tag used in canonical JSON output.
    pub fn tag(self) -> &'static str {
        match self {
            FieldKindIR::Text => "text",
            FieldKindIR::NumericEdit => "numeric_edit",
            FieldKindIR::Choice => "choice",
            FieldKindIR::DateTime => "date_time",
            FieldKindIR::Signature => "signature",
            FieldKindIR::Barcode => "barcode",
            FieldKindIR::Image => "image",
            FieldKindIR::Button => "button",
            FieldKindIR::Other => "other",
        }
    }
}

/// Overflow / split state of a node at IR-capture time.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
pub enum OverflowState {
    /// No overflow.
    #[default]
    None,
    /// Container split across pages.
    Split,
    /// Deferred entirely to the next page.
    DeferredToNextPage,
    /// Clipped to its parent's content area.
    Clipped,
    /// Overflow status not determined yet at the snapshot point.
    Unknown,
}

impl OverflowState {
    /// Stable string tag used in canonical JSON output.
    pub fn tag(self) -> &'static str {
        match self {
            OverflowState::None => "none",
            OverflowState::Split => "split",
            OverflowState::DeferredToNextPage => "deferred_to_next_page",
            OverflowState::Clipped => "clipped",
            OverflowState::Unknown => "unknown",
        }
    }
}

/// One node in the layout tree IR.
///
/// All fields are public so that consumers can construct synthetic IRs in
/// tests without going through a builder. Construction order does not
/// affect serialization order — the canonical-JSON serializer always
/// emits object keys alphabetically.
#[derive(Debug, Clone, PartialEq, Default)]
pub struct LayoutNode {
    /// Stable, path-derived identifier (see [`LayoutNodeId`]).
    pub id: LayoutNodeId,
    /// Coarse kind of this node.
    pub kind: NodeKind,
    /// Optional Scripting Object Model (SOM) path or name.
    pub som: Option<String>,
    /// Optional page index this node was placed on, 0-based.
    pub page_index: Option<u32>,
    /// Optional layout rectangle in points (1pt = 1/72 inch).
    pub rect: Option<Rect>,
    /// Presence/visibility state.
    pub presence: PresenceIR,
    /// Field-specific kind tag, only meaningful when `kind == Field`.
    pub field_kind: Option<FieldKindIR>,
    /// Optional 16-hex-char prefix of a value/text hash. We never serialize
    /// the raw value to keep snapshots small and to avoid leaking PII into
    /// repository-tracked goldens.
    pub value_hash: Option<String>,
    /// Overflow/split state at IR-capture time.
    pub overflow: OverflowState,
    /// Optional cross-reference to a `FormNodeId.0` value, useful when
    /// correlating IR snapshots against the original FormDOM.
    pub form_node_id: Option<u64>,
    /// Children, in their layout order.
    pub children: Vec<LayoutNode>,
}

impl LayoutNode {
    /// Convenience constructor for a node with default fields and a given id+kind.
    pub fn new(id: LayoutNodeId, kind: NodeKind) -> Self {
        Self {
            id,
            kind,
            ..Default::default()
        }
    }

    /// Add a child and return its position.
    ///
    /// The caller is responsible for using [`LayoutNodeId::child`] to derive
    /// the child's id; this method does not mutate the child.
    pub fn push_child(&mut self, child: LayoutNode) -> usize {
        self.children.push(child);
        self.children.len() - 1
    }
}

/// Top-level IR document.
#[derive(Debug, Clone, PartialEq)]
pub struct LayoutTreeIR {
    /// Schema version of this IR document. See [`SCHEMA_VERSION`].
    pub schema_version: u32,
    /// Root layout node.
    pub root: LayoutNode,
}

impl Default for LayoutTreeIR {
    fn default() -> Self {
        Self {
            schema_version: SCHEMA_VERSION,
            root: LayoutNode::new(LayoutNodeId::root(), NodeKind::Root),
        }
    }
}

impl LayoutTreeIR {
    /// Build an empty IR rooted at a `Root` node.
    pub fn new() -> Self {
        Self::default()
    }

    /// Render this IR to a canonical, deterministic JSON string.
    ///
    /// See [`canonical_json`] for the formatting contract.
    pub fn to_canonical_json(&self) -> String {
        let mut out = String::new();
        canonical_json::write_tree(&mut out, self);
        out
    }

    /// Total number of nodes in the tree, including the root.
    pub fn node_count(&self) -> usize {
        fn walk(n: &LayoutNode) -> usize {
            1 + n.children.iter().map(walk).sum::<usize>()
        }
        walk(&self.root)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn root_id_is_empty() {
        assert_eq!(LayoutNodeId::root().as_str(), "");
    }

    #[test]
    fn child_id_is_path() {
        let r = LayoutNodeId::root();
        let c0 = r.child(0);
        let c01 = c0.child(1);
        assert_eq!(c0.as_str(), "0");
        assert_eq!(c01.as_str(), "0/1");
    }

    #[test]
    fn empty_tree_node_count_is_one() {
        let tree = LayoutTreeIR::new();
        assert_eq!(tree.node_count(), 1);
        assert_eq!(tree.schema_version, SCHEMA_VERSION);
    }

    #[test]
    fn synthetic_tree_node_count() {
        let mut root = LayoutNode::new(LayoutNodeId::root(), NodeKind::Root);
        let mut p0 = LayoutNode::new(root.id.child(0), NodeKind::PageArea);
        p0.push_child(LayoutNode::new(p0.id.child(0), NodeKind::ContentArea));
        p0.push_child(LayoutNode::new(p0.id.child(1), NodeKind::Field));
        root.push_child(p0);
        let tree = LayoutTreeIR {
            schema_version: SCHEMA_VERSION,
            root,
        };
        assert_eq!(tree.node_count(), 4);
    }

    #[test]
    fn presence_default_is_visible() {
        let n = LayoutNode::default();
        assert_eq!(n.presence, PresenceIR::Visible);
    }

    #[test]
    fn overflow_default_is_none() {
        let n = LayoutNode::default();
        assert_eq!(n.overflow, OverflowState::None);
    }
}