oxipdf-ir 0.1.0

Intermediate representation types for the oxipdf PDF engine
Documentation
//! `StyledTreeBuilder` — incremental construction and structural validation.

use crate::error::InputValidationError;
use crate::node::{ContentVariant, Node, NodeId};
use crate::semantic::SemanticRole;
use crate::style::ResolvedStyle;
use crate::version::IrVersion;

use super::StyledTree;

/// Builder for constructing a `StyledTree` incrementally.
///
/// The first node added becomes the root. Children are attached via
/// [`add_child`](Self::add_child).
///
/// # Example
///
/// ```
/// use oxipdf_ir::*;
///
/// let mut builder = StyledTreeBuilder::new(IrVersion::new(1, 0));
///
/// let root = builder.add_node(
///     ContentVariant::Container,
///     ResolvedStyle::default(),
///     None,
///     None,
/// );
///
/// let _paragraph = builder.add_child(
///     root,
///     ContentVariant::Text(TextContent::new("Hello, PDF!")),
///     ResolvedStyle::default(),
///     Some(SemanticRole::Paragraph),
///     None,
/// );
///
/// let tree = builder.build().expect("valid tree");
/// assert_eq!(tree.node_count(), 2);
/// ```
pub struct StyledTreeBuilder {
    ir_version: IrVersion,
    nodes: Vec<Node>,
    /// When true, `build()` will propagate inheritable style properties
    /// (font families, font size, font weight, font style, color, line height,
    /// text align, direction, letter spacing, word spacing) from parent to
    /// child when the child's value is at its default.
    inherit_styles: bool,
}

impl StyledTreeBuilder {
    #[must_use]
    pub fn new(ir_version: IrVersion) -> Self {
        Self {
            ir_version,
            nodes: Vec::new(),
            inherit_styles: false,
        }
    }

    #[must_use]
    pub fn with_capacity(ir_version: IrVersion, capacity: usize) -> Self {
        Self {
            ir_version,
            nodes: Vec::with_capacity(capacity),
            inherit_styles: false,
        }
    }

    /// Enable automatic style inheritance during `build()`.
    ///
    /// When enabled, inheritable typography properties (font families,
    /// font size, font weight, font style, color, line height, text align,
    /// direction, letter spacing, word spacing) are propagated from parent
    /// to child nodes when the child retains the default value.
    ///
    /// This is a convenience for consumers that do not resolve inheritance
    /// themselves. If the consumer already resolves all styles, leave this
    /// disabled (the default).
    #[must_use]
    pub fn with_inheritance(mut self, enabled: bool) -> Self {
        self.inherit_styles = enabled;
        self
    }

    /// Add a detached node. The first node becomes the tree root.
    pub fn add_node(
        &mut self,
        content: ContentVariant,
        style: ResolvedStyle,
        semantic_role: Option<SemanticRole>,
        element_id: Option<String>,
    ) -> NodeId {
        let id = NodeId::from_raw(self.nodes.len() as u32);
        self.nodes.push(Node {
            id,
            content,
            style,
            children: Vec::new(),
            semantic_role,
            element_id,
        });
        id
    }

    /// Add a new node as a child of `parent`.
    ///
    /// # Panics
    /// Panics if `parent` is out of bounds.
    pub fn add_child(
        &mut self,
        parent: NodeId,
        content: ContentVariant,
        style: ResolvedStyle,
        semantic_role: Option<SemanticRole>,
        element_id: Option<String>,
    ) -> NodeId {
        let child_id = self.add_node(content, style, semantic_role, element_id);
        self.nodes[parent.raw() as usize].children.push(child_id);
        child_id
    }

    #[must_use]
    pub fn len(&self) -> usize {
        self.nodes.len()
    }

    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.nodes.is_empty()
    }

    /// Consume the builder and produce a structurally validated `StyledTree`.
    ///
    /// Validates: non-empty, valid child refs, no self-refs, single parent,
    /// full reachability from root.
    pub fn build(self) -> Result<StyledTree, InputValidationError> {
        if self.nodes.is_empty() {
            return Err(InputValidationError::EmptyTree);
        }

        let node_count = self.nodes.len() as u32;
        let root = NodeId::from_raw(0);
        let mut parent_of: Vec<Option<NodeId>> = vec![None; self.nodes.len()];

        for node in &self.nodes {
            for &child_id in &node.children {
                if child_id.raw() >= node_count {
                    return Err(InputValidationError::InvalidChildReference {
                        parent: node.id,
                        child: child_id,
                        node_count,
                    });
                }
                if child_id == node.id {
                    return Err(InputValidationError::SelfReference { node: node.id });
                }
                let idx = child_id.raw() as usize;
                if let Some(existing_parent) = parent_of[idx] {
                    return Err(InputValidationError::MultipleParents {
                        child: child_id,
                        parent_a: existing_parent,
                        parent_b: node.id,
                    });
                }
                parent_of[idx] = Some(node.id);
            }
        }

        // BFS reachability from root.
        let mut visited = vec![false; self.nodes.len()];
        let mut queue = std::collections::VecDeque::new();
        queue.push_back(root);
        visited[0] = true;
        while let Some(id) = queue.pop_front() {
            for &child in &self.nodes[id.raw() as usize].children {
                let idx = child.raw() as usize;
                if !visited[idx] {
                    visited[idx] = true;
                    queue.push_back(child);
                }
            }
        }

        for (i, &was_visited) in visited.iter().enumerate() {
            if !was_visited {
                return Err(InputValidationError::OrphanNode {
                    node: NodeId::from_raw(i as u32),
                });
            }
        }

        let mut tree = StyledTree {
            ir_version: self.ir_version,
            nodes: self.nodes,
            root,
        };

        if self.inherit_styles {
            tree.apply_style_inheritance();
        }

        Ok(tree)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::node::TextContent;
    use crate::semantic::SemanticRole;

    #[test]
    fn empty_tree_rejected() {
        let b = StyledTreeBuilder::new(IrVersion::new(1, 0));
        assert!(matches!(
            b.build().unwrap_err(),
            InputValidationError::EmptyTree
        ));
    }

    #[test]
    fn builder_with_capacity() {
        let b = StyledTreeBuilder::with_capacity(IrVersion::new(1, 0), 100);
        assert!(b.is_empty());
        assert_eq!(b.len(), 0);
    }

    #[test]
    fn orphan_node_rejected() {
        let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
        b.add_node(
            ContentVariant::Container,
            ResolvedStyle::default(),
            None,
            None,
        );
        b.add_node(
            ContentVariant::Text(TextContent::new("orphan")),
            ResolvedStyle::default(),
            None,
            None,
        );
        assert!(matches!(
            b.build().unwrap_err(),
            InputValidationError::OrphanNode { .. }
        ));
    }

    #[test]
    fn nested_tree() {
        let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
        let root = b.add_node(
            ContentVariant::Container,
            ResolvedStyle::default(),
            None,
            None,
        );
        let section = b.add_child(
            root,
            ContentVariant::Container,
            ResolvedStyle::default(),
            Some(SemanticRole::Section),
            None,
        );
        b.add_child(
            section,
            ContentVariant::Text(TextContent::new("Nested")),
            ResolvedStyle::default(),
            None,
            None,
        );
        let tree = b.build().unwrap();
        assert_eq!(tree.node_count(), 3);
        assert_eq!(tree.children(root).len(), 1);
        assert_eq!(tree.children(section).len(), 1);
        assert_eq!(tree.depth(NodeId::from_raw(2)), 2);
    }

    #[test]
    fn valid_tree_with_child() {
        let mut b = StyledTreeBuilder::new(IrVersion::new(1, 0));
        let root = b.add_node(
            ContentVariant::Container,
            ResolvedStyle::default(),
            None,
            None,
        );
        b.add_child(
            root,
            ContentVariant::Container,
            ResolvedStyle::default(),
            None,
            None,
        );
        assert!(b.build().is_ok());
    }
}