xml_3dm/xml/
printer.rs

1//! XML printer that outputs node trees.
2//!
3//! This printer outputs XML that matches the Java implementation's format
4//! for byte-for-byte compatibility.
5
6use std::io::Write;
7
8use crate::node::{NodeRef, XmlContent};
9
10/// Options for XML printing.
11#[derive(Debug, Clone, Default)]
12pub struct XmlPrinterOptions {
13    /// Whether to pretty-print with indentation.
14    pub pretty_print: bool,
15}
16
17/// XML printer that outputs node trees.
18pub struct XmlPrinter<W: Write> {
19    writer: W,
20    options: XmlPrinterOptions,
21    indent: usize,
22    /// State tracking for proper tag closing
23    state: PrintState,
24    /// Stack of "has content" flags for each element level
25    content_stack: Vec<bool>,
26    /// Whether current element has content
27    has_content: bool,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31enum PrintState {
32    Initial,
33    AfterTag,
34    AfterChars,
35}
36
37impl<W: Write> XmlPrinter<W> {
38    /// Creates a new XML printer.
39    pub fn new(writer: W) -> Self {
40        Self::with_options(writer, XmlPrinterOptions::default())
41    }
42
43    /// Creates a new XML printer with the given options.
44    pub fn with_options(writer: W, options: XmlPrinterOptions) -> Self {
45        XmlPrinter {
46            writer,
47            options,
48            indent: 0,
49            state: PrintState::Initial,
50            content_stack: Vec::new(),
51            has_content: true, // Start as true (like Java's HAS_CONTENT at document start)
52        }
53    }
54
55    /// Prints a node tree to the output.
56    pub fn print(&mut self, root: &NodeRef) -> std::io::Result<()> {
57        self.print_node(root, false)
58    }
59
60    /// Prints a node tree as a fragment (no XML declaration).
61    pub fn print_fragment(&mut self, root: &NodeRef) -> std::io::Result<()> {
62        self.print_node(root, true)
63    }
64
65    fn print_node(&mut self, node: &NodeRef, fragment: bool) -> std::io::Result<()> {
66        let borrowed = node.borrow();
67        let content = borrowed.content();
68
69        if !fragment {
70            self.start_document()?;
71        }
72
73        match content {
74            Some(XmlContent::Text(text)) => {
75                let text_str: String = text.text().iter().collect();
76                self.characters(&text_str)?;
77            }
78            Some(XmlContent::Comment(comment)) => {
79                let comment_text: String = comment.text().iter().collect();
80                self.print_with_nl(&format!(
81                    "{}<!-- {} -->",
82                    Self::indent_str(self.indent),
83                    comment_text
84                ))?;
85            }
86            Some(XmlContent::Element(element)) => {
87                let qname = element.qname();
88
89                // Skip the synthetic $ROOT$ element but print its children
90                if qname == "$ROOT$" {
91                    for child in borrowed.children() {
92                        self.print_node(child, true)?;
93                    }
94                } else {
95                    self.start_element(qname, element.attributes())?;
96
97                    for child in borrowed.children() {
98                        self.print_node(child, true)?;
99                    }
100
101                    self.end_element(qname)?;
102                }
103            }
104            None => {}
105        }
106
107        if !fragment {
108            self.end_document()?;
109        }
110
111        Ok(())
112    }
113
114    fn start_document(&mut self) -> std::io::Result<()> {
115        self.has_content = true;
116        write!(self.writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
117        if self.options.pretty_print {
118            writeln!(self.writer)?;
119        }
120        self.state = PrintState::AfterTag;
121        Ok(())
122    }
123
124    fn end_document(&mut self) -> std::io::Result<()> {
125        if !self.options.pretty_print {
126            writeln!(self.writer)?;
127        }
128        self.writer.flush()
129    }
130
131    fn start_element(
132        &mut self,
133        qname: &str,
134        attrs: &std::collections::HashMap<String, String>,
135    ) -> std::io::Result<()> {
136        // Close previous unclosed tag if needed
137        if !self.has_content {
138            self.print_with_nl(">")?;
139            self.has_content = true;
140        }
141
142        // In non-pretty mode, add newline between tags
143        if self.state == PrintState::AfterTag && !self.options.pretty_print {
144            writeln!(self.writer)?;
145        }
146
147        // Build opening tag
148        let mut tag = String::new();
149        tag.push('<');
150        tag.push_str(qname);
151
152        // Add attributes (sorted for deterministic output)
153        let mut attr_names: Vec<&String> = attrs.keys().collect();
154        attr_names.sort();
155        for name in attr_names {
156            let value = &attrs[name];
157            tag.push(' ');
158            tag.push_str(name);
159            tag.push_str("=\"");
160            tag.push_str(&to_entities(value));
161            tag.push('"');
162        }
163
164        // Print with indentation if pretty printing
165        if self.options.pretty_print {
166            write!(self.writer, "{}", &Self::indent_str(self.indent))?;
167        }
168        write!(self.writer, "{}", tag)?;
169
170        // Push state and increment indent
171        self.content_stack.push(self.has_content);
172        self.has_content = false; // Reset for new element
173        self.indent += 1;
174        self.state = PrintState::AfterTag;
175
176        Ok(())
177    }
178
179    fn end_element(&mut self, qname: &str) -> std::io::Result<()> {
180        self.indent -= 1;
181
182        if !self.has_content {
183            // No content - use self-closing tag
184            self.print_with_nl(" />")?;
185        } else {
186            // Has content - print closing tag
187            if self.state == PrintState::AfterTag && !self.options.pretty_print {
188                writeln!(self.writer)?;
189            }
190
191            let close_tag = format!("</{}>", qname);
192            if self.options.pretty_print {
193                write!(self.writer, "{}", &Self::indent_str(self.indent))?;
194            }
195            self.print_with_nl(&close_tag)?;
196        }
197
198        // Pop state
199        self.has_content = self.content_stack.pop().unwrap_or(true);
200        self.state = PrintState::AfterTag;
201
202        Ok(())
203    }
204
205    fn characters(&mut self, text: &str) -> std::io::Result<()> {
206        self.state = PrintState::AfterChars;
207
208        // Close previous unclosed tag if needed
209        if !self.has_content {
210            self.print_with_nl(">")?;
211        }
212        self.has_content = true;
213
214        if text.is_empty() {
215            return Ok(());
216        }
217
218        let encoded = to_entities(text);
219        self.print_with_nl(&encoded)
220    }
221
222    fn print_with_nl(&mut self, s: &str) -> std::io::Result<()> {
223        if self.options.pretty_print {
224            writeln!(self.writer, "{}", s)
225        } else {
226            write!(self.writer, "{}", s)
227        }
228    }
229
230    fn indent_str(level: usize) -> String {
231        "  ".repeat(level)
232    }
233}
234
235/// Converts special characters to XML entities.
236fn to_entities(s: &str) -> String {
237    let mut result = String::with_capacity(s.len());
238    for c in s.chars() {
239        match c {
240            '&' => result.push_str("&amp;"),
241            '<' => result.push_str("&lt;"),
242            '>' => result.push_str("&gt;"),
243            '\'' => result.push_str("&apos;"),
244            '"' => result.push_str("&quot;"),
245            _ => result.push(c),
246        }
247    }
248    result
249}
250
251/// Prints a node tree to a string.
252pub fn print_to_string(root: &NodeRef) -> std::io::Result<String> {
253    let mut output = Vec::new();
254    {
255        let mut printer = XmlPrinter::new(&mut output);
256        printer.print(root)?;
257    }
258    Ok(String::from_utf8_lossy(&output).to_string())
259}
260
261/// Prints a node tree to a string with pretty printing.
262pub fn print_to_string_pretty(root: &NodeRef) -> std::io::Result<String> {
263    let mut output = Vec::new();
264    {
265        let options = XmlPrinterOptions { pretty_print: true };
266        let mut printer = XmlPrinter::with_options(&mut output, options);
267        printer.print(root)?;
268    }
269    Ok(String::from_utf8_lossy(&output).to_string())
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use crate::xml::parse_str;
276
277    #[test]
278    fn test_print_simple() {
279        let xml = r#"<root>text</root>"#;
280        let root = parse_str(xml).unwrap();
281        let output = print_to_string(&root).unwrap();
282
283        // Should have XML declaration and the element
284        assert!(output.starts_with("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
285        assert!(output.contains("<root>"));
286        assert!(output.contains("text"));
287        assert!(output.contains("</root>"));
288    }
289
290    #[test]
291    fn test_print_with_attributes() {
292        let xml = r#"<root id="foo">content</root>"#;
293        let root = parse_str(xml).unwrap();
294        let output = print_to_string(&root).unwrap();
295
296        assert!(output.contains(r#"id="foo""#));
297        assert!(output.contains("content"));
298    }
299
300    #[test]
301    fn test_print_empty_element() {
302        let xml = r#"<root><empty /></root>"#;
303        let root = parse_str(xml).unwrap();
304        let output = print_to_string(&root).unwrap();
305
306        // Empty element should be self-closing
307        assert!(output.contains("<empty />"));
308    }
309
310    #[test]
311    fn test_entity_encoding() {
312        let xml = r#"<root attr="&amp;&lt;&gt;">&amp;&lt;&gt;</root>"#;
313        let root = parse_str(xml).unwrap();
314        let output = print_to_string(&root).unwrap();
315
316        // Entities should be preserved
317        assert!(output.contains("&amp;"));
318        assert!(output.contains("&lt;"));
319        assert!(output.contains("&gt;"));
320    }
321
322    #[test]
323    fn test_print_nested() {
324        let xml = r#"<a><b><c>deep</c></b></a>"#;
325        let root = parse_str(xml).unwrap();
326        let output = print_to_string(&root).unwrap();
327
328        assert!(output.contains("<a>"));
329        assert!(output.contains("<b>"));
330        assert!(output.contains("<c>"));
331        assert!(output.contains("deep"));
332        assert!(output.contains("</c>"));
333        assert!(output.contains("</b>"));
334        assert!(output.contains("</a>"));
335    }
336
337    #[test]
338    fn test_pretty_print() {
339        let xml = r#"<root><child>text</child></root>"#;
340        let root = parse_str(xml).unwrap();
341        let output = print_to_string_pretty(&root).unwrap();
342
343        // Pretty print should have indentation
344        assert!(output.contains(" <child>"));
345    }
346
347    /// Helper to compare tree structure (element names and text content)
348    fn trees_equal(a: &NodeRef, b: &NodeRef) -> bool {
349        let a_borrowed = a.borrow();
350        let b_borrowed = b.borrow();
351
352        // Compare content
353        match (a_borrowed.content(), b_borrowed.content()) {
354            (Some(XmlContent::Element(ea)), Some(XmlContent::Element(eb))) => {
355                if ea.qname() != eb.qname() {
356                    return false;
357                }
358                // Compare attributes
359                if ea.attributes() != eb.attributes() {
360                    return false;
361                }
362            }
363            (Some(XmlContent::Text(ta)), Some(XmlContent::Text(tb))) => {
364                let text_a: String = ta.text().iter().collect();
365                let text_b: String = tb.text().iter().collect();
366                if text_a != text_b {
367                    return false;
368                }
369            }
370            (None, None) => {}
371            _ => return false,
372        }
373
374        // Compare children count
375        if a_borrowed.child_count() != b_borrowed.child_count() {
376            return false;
377        }
378
379        // Compare children recursively
380        for (child_a, child_b) in a_borrowed
381            .children()
382            .iter()
383            .zip(b_borrowed.children().iter())
384        {
385            if !trees_equal(child_a, child_b) {
386                return false;
387            }
388        }
389
390        true
391    }
392
393    #[test]
394    fn test_round_trip_simple() {
395        let xml = r#"<root>text</root>"#;
396        let tree1 = parse_str(xml).unwrap();
397        let output1 = print_to_string(&tree1).unwrap();
398        let tree2 = parse_str(&output1).unwrap();
399
400        assert!(trees_equal(&tree1, &tree2));
401    }
402
403    #[test]
404    fn test_round_trip_with_attributes() {
405        let xml = r#"<root id="foo" class="bar"><child name="test">content</child></root>"#;
406        let tree1 = parse_str(xml).unwrap();
407        let output1 = print_to_string(&tree1).unwrap();
408        let tree2 = parse_str(&output1).unwrap();
409
410        assert!(trees_equal(&tree1, &tree2));
411    }
412
413    #[test]
414    fn test_round_trip_nested() {
415        let xml = r#"<a><b><c><d>deep text</d></c></b></a>"#;
416        let tree1 = parse_str(xml).unwrap();
417        let output1 = print_to_string(&tree1).unwrap();
418        let tree2 = parse_str(&output1).unwrap();
419
420        assert!(trees_equal(&tree1, &tree2));
421    }
422
423    #[test]
424    fn test_round_trip_mixed_content() {
425        let xml = r#"<root>text1<child>inner</child>text2</root>"#;
426        let tree1 = parse_str(xml).unwrap();
427        let output1 = print_to_string(&tree1).unwrap();
428        let tree2 = parse_str(&output1).unwrap();
429
430        assert!(trees_equal(&tree1, &tree2));
431    }
432
433    #[test]
434    fn test_round_trip_empty_elements() {
435        let xml = r#"<root><empty /><also-empty></also-empty></root>"#;
436        let tree1 = parse_str(xml).unwrap();
437        let output1 = print_to_string(&tree1).unwrap();
438        let tree2 = parse_str(&output1).unwrap();
439
440        assert!(trees_equal(&tree1, &tree2));
441    }
442
443    #[test]
444    fn test_round_trip_entities() {
445        let xml =
446            r#"<root attr="&amp;&lt;&gt;&apos;&quot;">text with &amp; and &lt;tag&gt;</root>"#;
447        let tree1 = parse_str(xml).unwrap();
448        let output1 = print_to_string(&tree1).unwrap();
449        let tree2 = parse_str(&output1).unwrap();
450
451        assert!(trees_equal(&tree1, &tree2));
452    }
453
454    #[test]
455    fn test_double_round_trip() {
456        // Parse -> Print -> Parse -> Print should produce identical output
457        let xml = r#"<doc><section id="s1"><para>First paragraph.</para><para>Second paragraph.</para></section></doc>"#;
458        let tree1 = parse_str(xml).unwrap();
459        let output1 = print_to_string(&tree1).unwrap();
460        let tree2 = parse_str(&output1).unwrap();
461        let output2 = print_to_string(&tree2).unwrap();
462
463        // The second print should produce identical output
464        assert_eq!(output1, output2);
465    }
466}