kobold-xml 0.1.0

A clean-room, Rust-native COBOL-data XML layer (real XML GENERATE). Independent of GnuCOBOL/libcob; not a GnuCOBOL parity claim.
Documentation
//! XML GENERATE -- a deterministic serializer of an explicit element tree to XML text.
//!
//! This is a *clean, general* XML serializer. It is independent of GnuCOBOL/libcob -- the output policy is
//! ours (stable, explicit, no namespace magic, no schema, no hidden inference), not a reproduction of any
//! COBOL runtime's exact bytes.
//!
//! ## Determinism
//!
//! For a given tree and [`GenerateOptions`] the output bytes are a pure function of the input -- attributes
//! are emitted in tree order (never reordered), no timestamps, no locale. That is what makes the output
//! safe for migration diffs and golden-file tests (`EXT.XML.GENERATE.1`).

/// A node in the XML tree: an element subtree or a run of text.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum XmlNode {
    /// An element (`<name ...>...</name>`).
    Element(XmlElement),
    /// A run of character data (escaped on output).
    Text(String),
}

/// An XML element: a name, ordered attributes, and ordered children. Groups carry child elements; leaves
/// carry a single [`XmlNode::Text`] child (or none, for an empty element).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XmlElement {
    /// The element (tag) name. Must be a valid XML `Name`; the COBOL adapter sanitizes names that begin
    /// with a digit.
    pub name: String,
    /// Ordered `(name, value)` attributes. Emitted in this exact order (determinism).
    pub attributes: Vec<(String, String)>,
    /// Ordered children.
    pub children: Vec<XmlNode>,
}

impl XmlElement {
    /// A group element with the given name and children.
    pub fn group(name: impl Into<String>, children: Vec<XmlNode>) -> Self {
        XmlElement { name: name.into(), attributes: Vec::new(), children }
    }
    /// A leaf element `<name>text</name>` (the text is escaped on output).
    pub fn leaf(name: impl Into<String>, text: impl Into<String>) -> Self {
        XmlElement { name: name.into(), attributes: Vec::new(), children: vec![XmlNode::Text(text.into())] }
    }
    /// An empty element `<name/>`.
    pub fn empty(name: impl Into<String>) -> Self {
        XmlElement { name: name.into(), attributes: Vec::new(), children: Vec::new() }
    }
    /// Append an attribute (returns self for chaining).
    pub fn with_attr(mut self, name: impl Into<String>, value: impl Into<String>) -> Self {
        self.attributes.push((name.into(), value.into()));
        self
    }
}

/// Serialization options.
#[derive(Debug, Clone)]
pub struct GenerateOptions {
    /// Emit the `<?xml version="1.0" encoding="UTF-8"?>` declaration as the first line.
    pub xml_declaration: bool,
    /// Pretty-print with `indent`-space indentation + newlines. `None` = compact (no whitespace between
    /// tags) -- the safest form for byte-stable diffs.
    pub indent: Option<usize>,
}

impl Default for GenerateOptions {
    fn default() -> Self {
        GenerateOptions { xml_declaration: false, indent: None }
    }
}

/// `EXT.XML.ESCAPE.1` -- escape a run of element character data: `&`->`&amp;`, `<`->`&lt;`, `>`->`&gt;`.
/// (`>` is escaped too, though only required after `]]`, for a conservative, unambiguous output.)
pub fn escape_text(s: &str, out: &mut String) {
    for c in s.chars() {
        match c {
            '&' => out.push_str("&amp;"),
            '<' => out.push_str("&lt;"),
            '>' => out.push_str("&gt;"),
            _ => out.push(c),
        }
    }
}

/// `EXT.XML.ESCAPE.1` -- escape an attribute value (double-quoted): text escaping plus `"`->`&quot;` and
/// the whitespace controls TAB/LF/CR as character references (so an attribute round-trips its exact value).
pub fn escape_attr(s: &str, out: &mut String) {
    for c in s.chars() {
        match c {
            '&' => out.push_str("&amp;"),
            '<' => out.push_str("&lt;"),
            '>' => out.push_str("&gt;"),
            '"' => out.push_str("&quot;"),
            '\t' => out.push_str("&#9;"),
            '\n' => out.push_str("&#10;"),
            '\r' => out.push_str("&#13;"),
            _ => out.push(c),
        }
    }
}

fn write_element(el: &XmlElement, opts: &GenerateOptions, depth: usize, out: &mut String) {
    let pretty = opts.indent.is_some();
    let pad = |n: usize, out: &mut String| {
        if let Some(w) = opts.indent {
            out.push_str(&" ".repeat(w * n));
        }
    };

    pad(depth, out);
    out.push('<');
    out.push_str(&el.name);
    for (k, v) in &el.attributes {
        out.push(' ');
        out.push_str(k);
        out.push_str("=\"");
        escape_attr(v, out);
        out.push('"');
    }

    // An element with no children is written self-closed `<name/>`.
    if el.children.is_empty() {
        out.push_str("/>");
        if pretty {
            out.push('\n');
        }
        return;
    }

    // A single text child -> inline `<name>text</name>` even in pretty mode (no surrounding whitespace,
    // which would alter the value).
    if el.children.len() == 1 {
        if let XmlNode::Text(t) = &el.children[0] {
            out.push('>');
            escape_text(t, out);
            out.push_str("</");
            out.push_str(&el.name);
            out.push('>');
            if pretty {
                out.push('\n');
            }
            return;
        }
    }

    out.push('>');
    if pretty {
        out.push('\n');
    }
    for child in &el.children {
        match child {
            XmlNode::Element(c) => write_element(c, opts, depth + 1, out),
            XmlNode::Text(t) => {
                pad(depth + 1, out);
                escape_text(t, out);
                if pretty {
                    out.push('\n');
                }
            }
        }
    }
    pad(depth, out);
    out.push_str("</");
    out.push_str(&el.name);
    out.push('>');
    if pretty {
        out.push('\n');
    }
}

/// `EXT.XML.GENERATE.1` -- serialize an element tree to XML text. Deterministic: the output is a pure
/// function of `root` + `opts` (attributes in tree order, no timestamps/locale). Leaves render inline
/// (`<n>text</n>`); empty elements self-close (`<n/>`); groups nest (indented when `opts.indent` is set).
pub fn generate(root: &XmlElement, opts: &GenerateOptions) -> String {
    let mut out = String::new();
    if opts.xml_declaration {
        out.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
        out.push('\n');
    }
    write_element(root, opts, 0, &mut out);
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn escape_text_and_attr() {
        let mut t = String::new();
        escape_text("a<b&c>d", &mut t);
        assert_eq!(t, "a&lt;b&amp;c&gt;d");
        let mut a = String::new();
        escape_attr("x\"y&z\tw", &mut a);
        assert_eq!(a, "x&quot;y&amp;z&#9;w");
    }

    #[test]
    fn generate_compact_deterministic() {
        let tree = XmlElement::group(
            "G",
            vec![
                XmlNode::Element(XmlElement::leaf("NEG", "-42")),
                XmlNode::Element(XmlElement::leaf("SPC", "a<b&c")),
                XmlNode::Element(
                    XmlElement::group("GRP", vec![XmlNode::Element(XmlElement::leaf("X", "hi"))])
                        .with_attr("id", "1\""),
                ),
                XmlNode::Element(XmlElement::empty("EMPTY")),
            ],
        );
        let out = generate(&tree, &GenerateOptions::default());
        assert_eq!(
            out,
            "<G><NEG>-42</NEG><SPC>a&lt;b&amp;c</SPC><GRP id=\"1&quot;\"><X>hi</X></GRP><EMPTY/></G>"
        );
        assert_eq!(out, generate(&tree, &GenerateOptions::default())); // determinism
    }

    #[test]
    fn generate_pretty_and_declaration() {
        let tree = XmlElement::group("R", vec![XmlNode::Element(XmlElement::leaf("A", "1"))]);
        let out = generate(&tree, &GenerateOptions { xml_declaration: true, indent: Some(2) });
        assert_eq!(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<R>\n  <A>1</A>\n</R>\n");
    }
}