Skip to main content

kobold_xml/
generate.rs

1//! XML GENERATE -- a deterministic serializer of an explicit element tree to XML text.
2//!
3//! This is a *clean, general* XML serializer. It is independent of GnuCOBOL/libcob -- the output policy is
4//! ours (stable, explicit, no namespace magic, no schema, no hidden inference), not a reproduction of any
5//! COBOL runtime's exact bytes.
6//!
7//! ## Determinism
8//!
9//! For a given tree and [`GenerateOptions`] the output bytes are a pure function of the input -- attributes
10//! are emitted in tree order (never reordered), no timestamps, no locale. That is what makes the output
11//! safe for migration diffs and golden-file tests (`EXT.XML.GENERATE.1`).
12
13/// A node in the XML tree: an element subtree or a run of text.
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum XmlNode {
16    /// An element (`<name ...>...</name>`).
17    Element(XmlElement),
18    /// A run of character data (escaped on output).
19    Text(String),
20}
21
22/// An XML element: a name, ordered attributes, and ordered children. Groups carry child elements; leaves
23/// carry a single [`XmlNode::Text`] child (or none, for an empty element).
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct XmlElement {
26    /// The element (tag) name. Must be a valid XML `Name`; the COBOL adapter sanitizes names that begin
27    /// with a digit.
28    pub name: String,
29    /// Ordered `(name, value)` attributes. Emitted in this exact order (determinism).
30    pub attributes: Vec<(String, String)>,
31    /// Ordered children.
32    pub children: Vec<XmlNode>,
33}
34
35impl XmlElement {
36    /// A group element with the given name and children.
37    pub fn group(name: impl Into<String>, children: Vec<XmlNode>) -> Self {
38        XmlElement { name: name.into(), attributes: Vec::new(), children }
39    }
40    /// A leaf element `<name>text</name>` (the text is escaped on output).
41    pub fn leaf(name: impl Into<String>, text: impl Into<String>) -> Self {
42        XmlElement { name: name.into(), attributes: Vec::new(), children: vec![XmlNode::Text(text.into())] }
43    }
44    /// An empty element `<name/>`.
45    pub fn empty(name: impl Into<String>) -> Self {
46        XmlElement { name: name.into(), attributes: Vec::new(), children: Vec::new() }
47    }
48    /// Append an attribute (returns self for chaining).
49    pub fn with_attr(mut self, name: impl Into<String>, value: impl Into<String>) -> Self {
50        self.attributes.push((name.into(), value.into()));
51        self
52    }
53}
54
55/// Serialization options.
56#[derive(Debug, Clone)]
57pub struct GenerateOptions {
58    /// Emit the `<?xml version="1.0" encoding="UTF-8"?>` declaration as the first line.
59    pub xml_declaration: bool,
60    /// Pretty-print with `indent`-space indentation + newlines. `None` = compact (no whitespace between
61    /// tags) -- the safest form for byte-stable diffs.
62    pub indent: Option<usize>,
63}
64
65impl Default for GenerateOptions {
66    fn default() -> Self {
67        GenerateOptions { xml_declaration: false, indent: None }
68    }
69}
70
71/// `EXT.XML.ESCAPE.1` -- escape a run of element character data: `&`->`&amp;`, `<`->`&lt;`, `>`->`&gt;`.
72/// (`>` is escaped too, though only required after `]]`, for a conservative, unambiguous output.)
73pub fn escape_text(s: &str, out: &mut String) {
74    for c in s.chars() {
75        match c {
76            '&' => out.push_str("&amp;"),
77            '<' => out.push_str("&lt;"),
78            '>' => out.push_str("&gt;"),
79            _ => out.push(c),
80        }
81    }
82}
83
84/// `EXT.XML.ESCAPE.1` -- escape an attribute value (double-quoted): text escaping plus `"`->`&quot;` and
85/// the whitespace controls TAB/LF/CR as character references (so an attribute round-trips its exact value).
86pub fn escape_attr(s: &str, out: &mut String) {
87    for c in s.chars() {
88        match c {
89            '&' => out.push_str("&amp;"),
90            '<' => out.push_str("&lt;"),
91            '>' => out.push_str("&gt;"),
92            '"' => out.push_str("&quot;"),
93            '\t' => out.push_str("&#9;"),
94            '\n' => out.push_str("&#10;"),
95            '\r' => out.push_str("&#13;"),
96            _ => out.push(c),
97        }
98    }
99}
100
101fn write_element(el: &XmlElement, opts: &GenerateOptions, depth: usize, out: &mut String) {
102    let pretty = opts.indent.is_some();
103    let pad = |n: usize, out: &mut String| {
104        if let Some(w) = opts.indent {
105            out.push_str(&" ".repeat(w * n));
106        }
107    };
108
109    pad(depth, out);
110    out.push('<');
111    out.push_str(&el.name);
112    for (k, v) in &el.attributes {
113        out.push(' ');
114        out.push_str(k);
115        out.push_str("=\"");
116        escape_attr(v, out);
117        out.push('"');
118    }
119
120    // An element with no children is written self-closed `<name/>`.
121    if el.children.is_empty() {
122        out.push_str("/>");
123        if pretty {
124            out.push('\n');
125        }
126        return;
127    }
128
129    // A single text child -> inline `<name>text</name>` even in pretty mode (no surrounding whitespace,
130    // which would alter the value).
131    if el.children.len() == 1 {
132        if let XmlNode::Text(t) = &el.children[0] {
133            out.push('>');
134            escape_text(t, out);
135            out.push_str("</");
136            out.push_str(&el.name);
137            out.push('>');
138            if pretty {
139                out.push('\n');
140            }
141            return;
142        }
143    }
144
145    out.push('>');
146    if pretty {
147        out.push('\n');
148    }
149    for child in &el.children {
150        match child {
151            XmlNode::Element(c) => write_element(c, opts, depth + 1, out),
152            XmlNode::Text(t) => {
153                pad(depth + 1, out);
154                escape_text(t, out);
155                if pretty {
156                    out.push('\n');
157                }
158            }
159        }
160    }
161    pad(depth, out);
162    out.push_str("</");
163    out.push_str(&el.name);
164    out.push('>');
165    if pretty {
166        out.push('\n');
167    }
168}
169
170/// `EXT.XML.GENERATE.1` -- serialize an element tree to XML text. Deterministic: the output is a pure
171/// function of `root` + `opts` (attributes in tree order, no timestamps/locale). Leaves render inline
172/// (`<n>text</n>`); empty elements self-close (`<n/>`); groups nest (indented when `opts.indent` is set).
173pub fn generate(root: &XmlElement, opts: &GenerateOptions) -> String {
174    let mut out = String::new();
175    if opts.xml_declaration {
176        out.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
177        out.push('\n');
178    }
179    write_element(root, opts, 0, &mut out);
180    out
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn escape_text_and_attr() {
189        let mut t = String::new();
190        escape_text("a<b&c>d", &mut t);
191        assert_eq!(t, "a&lt;b&amp;c&gt;d");
192        let mut a = String::new();
193        escape_attr("x\"y&z\tw", &mut a);
194        assert_eq!(a, "x&quot;y&amp;z&#9;w");
195    }
196
197    #[test]
198    fn generate_compact_deterministic() {
199        let tree = XmlElement::group(
200            "G",
201            vec![
202                XmlNode::Element(XmlElement::leaf("NEG", "-42")),
203                XmlNode::Element(XmlElement::leaf("SPC", "a<b&c")),
204                XmlNode::Element(
205                    XmlElement::group("GRP", vec![XmlNode::Element(XmlElement::leaf("X", "hi"))])
206                        .with_attr("id", "1\""),
207                ),
208                XmlNode::Element(XmlElement::empty("EMPTY")),
209            ],
210        );
211        let out = generate(&tree, &GenerateOptions::default());
212        assert_eq!(
213            out,
214            "<G><NEG>-42</NEG><SPC>a&lt;b&amp;c</SPC><GRP id=\"1&quot;\"><X>hi</X></GRP><EMPTY/></G>"
215        );
216        assert_eq!(out, generate(&tree, &GenerateOptions::default())); // determinism
217    }
218
219    #[test]
220    fn generate_pretty_and_declaration() {
221        let tree = XmlElement::group("R", vec![XmlNode::Element(XmlElement::leaf("A", "1"))]);
222        let out = generate(&tree, &GenerateOptions { xml_declaration: true, indent: Some(2) });
223        assert_eq!(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<R>\n  <A>1</A>\n</R>\n");
224    }
225}