rulemorph 0.3.1

YAML-based declarative data transformation engine for CSV/JSON to JSON
Documentation
use quick_xml::events::Event;
use quick_xml::reader::NsReader as XmlReader;

use crate::error::{TransformError, TransformErrorKind};
use crate::model::XmlInput;

use super::super::NormalizationOptions;
use super::invalid;
use super::names::{enforce_namespace_rebinding, start_node};
use super::shape::XmlNode;

pub(super) fn parse_xml_tree(
    input: &str,
    xml: &XmlInput,
    options: &NormalizationOptions,
) -> Result<XmlNode, TransformError> {
    let mut reader = XmlReader::from_str(input);
    reader.trim_text(false);
    let mut stack = Vec::new();
    let mut root = None;
    let mut node_count = 0usize;
    loop {
        match reader.read_event() {
            Ok(Event::Start(event)) => {
                node_count = node_count.saturating_add(1);
                enforce_xml_node_count(node_count, options)?;
                if stack.len() >= options.max_depth {
                    return Err(invalid("input exceeds max_depth"));
                }
                enforce_namespace_rebinding(&event, &reader)?;
                stack.push(start_node(&event, xml, &reader)?);
            }
            Ok(Event::Empty(event)) => {
                node_count = node_count.saturating_add(1);
                enforce_xml_node_count(node_count, options)?;
                if stack.len() >= options.max_depth {
                    return Err(invalid("input exceeds max_depth"));
                }
                enforce_namespace_rebinding(&event, &reader)?;
                let node = start_node(&event, xml, &reader)?;
                attach_node(node, &mut stack, &mut root)?;
            }
            Ok(Event::Text(event)) => {
                let text = event.unescape().map_err(xml_err)?.into_owned();
                append_text(&mut stack, text, options)?;
            }
            Ok(Event::CData(event)) => {
                let text = String::from_utf8(event.into_inner().into_owned()).map_err(|err| {
                    TransformError::new(
                        TransformErrorKind::InvalidInput,
                        format!("failed to parse XML CDATA: {}", err),
                    )
                })?;
                append_text(&mut stack, text, options)?;
            }
            Ok(Event::End(_)) => {
                let node = stack
                    .pop()
                    .ok_or_else(|| invalid("XML close tag without matching start tag"))?;
                attach_node(node, &mut stack, &mut root)?;
            }
            Ok(Event::Decl(_)) | Ok(Event::Comment(_)) => {}
            Ok(Event::DocType(_)) => return Err(invalid("XML DTD is not supported")),
            Ok(Event::PI(_)) => {
                return Err(invalid("XML processing instructions are not supported"));
            }
            Ok(Event::Eof) => break,
            Err(err) => return Err(xml_err(err)),
        }
    }
    if !stack.is_empty() {
        return Err(invalid(
            "XML document ended before all elements were closed",
        ));
    }
    root.ok_or_else(|| invalid("XML document has no root element"))
}

fn append_text(
    stack: &mut [XmlNode],
    text: String,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    let Some(current) = stack.last_mut() else {
        if text.trim().is_empty() {
            return Ok(());
        }
        return Err(invalid("XML text outside root element"));
    };
    if current
        .text
        .len()
        .checked_add(text.len())
        .is_none_or(|len| len > options.max_text_bytes)
    {
        return Err(invalid("input exceeds max_text_bytes"));
    }
    current.text.push_str(&text);
    Ok(())
}

fn attach_node(
    node: XmlNode,
    stack: &mut [XmlNode],
    root: &mut Option<XmlNode>,
) -> Result<(), TransformError> {
    if let Some(parent) = stack.last_mut() {
        parent.children.push(node);
        return Ok(());
    }
    if root.is_some() {
        return Err(invalid("XML document must have a single root element"));
    }
    *root = Some(node);
    Ok(())
}

fn enforce_xml_node_count(
    count: usize,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    if count > options.max_xml_nodes {
        return Err(invalid("input exceeds max_xml_nodes"));
    }
    Ok(())
}

fn xml_err(err: impl std::fmt::Display) -> TransformError {
    TransformError::new(
        TransformErrorKind::InvalidInput,
        format!("failed to parse XML input: {}", err),
    )
}