Skip to main content

xml_3dm/xml/
printer.rs

1//! XML printer that outputs node trees.
2//!
3//! This printer outputs XML that matches the Java implementation's format
4//! for byte-for-byte compatibility.
5
6use std::io::Write;
7
8use crate::node::{NodeRef, XmlContent};
9
10/// Options for XML printing.
11#[derive(Debug, Clone, Default)]
12pub struct XmlPrinterOptions {
13    /// Whether to pretty-print with indentation.
14    pub pretty_print: bool,
15}
16
17/// XML printer that outputs node trees.
18pub struct XmlPrinter<W: Write> {
19    writer: W,
20    options: XmlPrinterOptions,
21    indent: usize,
22    /// State tracking for proper tag closing
23    state: PrintState,
24    /// Stack of "has content" flags for each element level
25    content_stack: Vec<bool>,
26    /// Whether current element has content
27    has_content: bool,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31enum PrintState {
32    Initial,
33    AfterTag,
34    AfterChars,
35}
36
37impl<W: Write> XmlPrinter<W> {
38    /// Creates a new XML printer.
39    pub fn new(writer: W) -> Self {
40        Self::with_options(writer, XmlPrinterOptions::default())
41    }
42
43    /// Creates a new XML printer with the given options.
44    pub fn with_options(writer: W, options: XmlPrinterOptions) -> Self {
45        XmlPrinter {
46            writer,
47            options,
48            indent: 0,
49            state: PrintState::Initial,
50            content_stack: Vec::new(),
51            has_content: true, // Start as true (like Java's HAS_CONTENT at document start)
52        }
53    }
54
55    /// Prints a node tree to the output.
56    pub fn print(&mut self, root: &NodeRef) -> std::io::Result<()> {
57        self.print_node(root, false)
58    }
59
60    /// Prints a node tree as a fragment (no XML declaration).
61    pub fn print_fragment(&mut self, root: &NodeRef) -> std::io::Result<()> {
62        self.print_node(root, true)
63    }
64
65    fn print_node(&mut self, node: &NodeRef, fragment: bool) -> std::io::Result<()> {
66        let borrowed = node.borrow();
67        let content = borrowed.content();
68
69        if !fragment {
70            self.start_document()?;
71        }
72
73        match content {
74            Some(XmlContent::Text(text)) => {
75                let text_str: String = text.text().iter().collect();
76                self.characters(&text_str)?;
77            }
78            Some(XmlContent::Comment(comment)) => {
79                let comment_text: String = comment.text().iter().collect();
80                self.print_with_nl(&format!(
81                    "{}<!-- {} -->",
82                    Self::indent_str(self.indent),
83                    comment_text
84                ))?;
85            }
86            Some(XmlContent::Element(element)) => {
87                let qname = element.qname();
88
89                // Skip the synthetic $ROOT$ element but print its children
90                if qname == "$ROOT$" {
91                    for child in borrowed.children() {
92                        self.print_node(child, true)?;
93                    }
94                } else {
95                    self.start_element(qname, element.attributes())?;
96
97                    for child in borrowed.children() {
98                        self.print_node(child, true)?;
99                    }
100
101                    self.end_element(qname)?;
102                }
103            }
104            None => {}
105        }
106
107        if !fragment {
108            self.end_document()?;
109        }
110
111        Ok(())
112    }
113
114    fn start_document(&mut self) -> std::io::Result<()> {
115        self.has_content = true;
116        write!(self.writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
117        if self.options.pretty_print {
118            writeln!(self.writer)?;
119        }
120        self.state = PrintState::AfterTag;
121        Ok(())
122    }
123
124    fn end_document(&mut self) -> std::io::Result<()> {
125        if !self.options.pretty_print {
126            writeln!(self.writer)?;
127        }
128        self.writer.flush()
129    }
130
131    fn start_element(
132        &mut self,
133        qname: &str,
134        attrs: &std::collections::HashMap<String, String>,
135    ) -> std::io::Result<()> {
136        // Close previous unclosed tag if needed
137        if !self.has_content {
138            self.print_with_nl(">")?;
139            self.has_content = true;
140        }
141
142        // In non-pretty mode, add newline between tags
143        if self.state == PrintState::AfterTag && !self.options.pretty_print {
144            writeln!(self.writer)?;
145        }
146
147        // Build opening tag
148        let mut tag = String::new();
149        tag.push('<');
150        tag.push_str(qname);
151
152        // Add attributes (sorted for deterministic output)
153        let mut attr_names: Vec<&String> = attrs.keys().collect();
154        attr_names.sort();
155        for name in attr_names {
156            let value = &attrs[name];
157            tag.push(' ');
158            tag.push_str(name);
159            tag.push_str("=\"");
160            tag.push_str(&to_entities(value));
161            tag.push('"');
162        }
163
164        // Print with indentation if pretty printing
165        if self.options.pretty_print {
166            write!(self.writer, "{}", &Self::indent_str(self.indent))?;
167        }
168        write!(self.writer, "{}", tag)?;
169
170        // Push state and increment indent
171        self.content_stack.push(self.has_content);
172        self.has_content = false; // Reset for new element
173        self.indent += 1;
174        self.state = PrintState::AfterTag;
175
176        Ok(())
177    }
178
179    fn end_element(&mut self, qname: &str) -> std::io::Result<()> {
180        self.indent -= 1;
181
182        if !self.has_content {
183            // No content - use self-closing tag
184            self.print_with_nl(" />")?;
185        } else {
186            // Has content - print closing tag
187            let close_tag = format!("</{}>", qname);
188
189            if self.state == PrintState::AfterChars {
190                // Text content - closing tag goes inline (no indent, no preceding newline)
191                self.print_with_nl(&close_tag)?;
192            } else {
193                // Child elements - closing tag on new line with indent
194                if !self.options.pretty_print {
195                    writeln!(self.writer)?;
196                }
197                if self.options.pretty_print {
198                    write!(self.writer, "{}", &Self::indent_str(self.indent))?;
199                }
200                self.print_with_nl(&close_tag)?;
201            }
202        }
203
204        // Pop state
205        self.has_content = self.content_stack.pop().unwrap_or(true);
206        self.state = PrintState::AfterTag;
207
208        Ok(())
209    }
210
211    fn characters(&mut self, text: &str) -> std::io::Result<()> {
212        self.state = PrintState::AfterChars;
213
214        // Close previous unclosed tag if needed
215        if !self.has_content {
216            write!(self.writer, ">")?;
217        }
218        self.has_content = true;
219
220        if text.is_empty() {
221            return Ok(());
222        }
223
224        let encoded = to_entities(text);
225        // Text content should not have newlines added - keep it inline
226        write!(self.writer, "{}", encoded)
227    }
228
229    fn print_with_nl(&mut self, s: &str) -> std::io::Result<()> {
230        if self.options.pretty_print {
231            writeln!(self.writer, "{}", s)
232        } else {
233            write!(self.writer, "{}", s)
234        }
235    }
236
237    fn indent_str(level: usize) -> String {
238        "  ".repeat(level)
239    }
240}
241
242/// Converts special characters to XML entities.
243fn to_entities(s: &str) -> String {
244    let mut result = String::with_capacity(s.len());
245    for c in s.chars() {
246        match c {
247            '&' => result.push_str("&amp;"),
248            '<' => result.push_str("&lt;"),
249            '>' => result.push_str("&gt;"),
250            '\'' => result.push_str("&apos;"),
251            '"' => result.push_str("&quot;"),
252            _ => result.push(c),
253        }
254    }
255    result
256}
257
258/// Prints a node tree to a string.
259pub fn print_to_string(root: &NodeRef) -> std::io::Result<String> {
260    let mut output = Vec::new();
261    {
262        let mut printer = XmlPrinter::new(&mut output);
263        printer.print(root)?;
264    }
265    Ok(String::from_utf8_lossy(&output).to_string())
266}
267
268/// Prints a node tree to a string with pretty printing.
269pub fn print_to_string_pretty(root: &NodeRef) -> std::io::Result<String> {
270    let mut output = Vec::new();
271    {
272        let options = XmlPrinterOptions { pretty_print: true };
273        let mut printer = XmlPrinter::with_options(&mut output, options);
274        printer.print(root)?;
275    }
276    Ok(String::from_utf8_lossy(&output).to_string())
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use crate::xml::parse_str;
283
284    #[test]
285    fn test_print_simple() {
286        let xml = r#"<root>text</root>"#;
287        let root = parse_str(xml).unwrap();
288        let output = print_to_string(&root).unwrap();
289
290        // Should have XML declaration and the element
291        assert!(output.starts_with("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
292        assert!(output.contains("<root>"));
293        assert!(output.contains("text"));
294        assert!(output.contains("</root>"));
295    }
296
297    #[test]
298    fn test_print_with_attributes() {
299        let xml = r#"<root id="foo">content</root>"#;
300        let root = parse_str(xml).unwrap();
301        let output = print_to_string(&root).unwrap();
302
303        assert!(output.contains(r#"id="foo""#));
304        assert!(output.contains("content"));
305    }
306
307    #[test]
308    fn test_print_empty_element() {
309        let xml = r#"<root><empty /></root>"#;
310        let root = parse_str(xml).unwrap();
311        let output = print_to_string(&root).unwrap();
312
313        // Empty element should be self-closing
314        assert!(output.contains("<empty />"));
315    }
316
317    #[test]
318    fn test_entity_encoding() {
319        let xml = r#"<root attr="&amp;&lt;&gt;">&amp;&lt;&gt;</root>"#;
320        let root = parse_str(xml).unwrap();
321        let output = print_to_string(&root).unwrap();
322
323        // Entities should be preserved
324        assert!(output.contains("&amp;"));
325        assert!(output.contains("&lt;"));
326        assert!(output.contains("&gt;"));
327    }
328
329    #[test]
330    fn test_print_nested() {
331        let xml = r#"<a><b><c>deep</c></b></a>"#;
332        let root = parse_str(xml).unwrap();
333        let output = print_to_string(&root).unwrap();
334
335        assert!(output.contains("<a>"));
336        assert!(output.contains("<b>"));
337        assert!(output.contains("<c>"));
338        assert!(output.contains("deep"));
339        assert!(output.contains("</c>"));
340        assert!(output.contains("</b>"));
341        assert!(output.contains("</a>"));
342    }
343
344    #[test]
345    fn test_pretty_print() {
346        let xml = r#"<root><child>text</child></root>"#;
347        let root = parse_str(xml).unwrap();
348        let output = print_to_string_pretty(&root).unwrap();
349
350        // Pretty print should have indentation
351        assert!(output.contains(" <child>"));
352    }
353
354    /// Helper to compare tree structure (element names and text content)
355    fn trees_equal(a: &NodeRef, b: &NodeRef) -> bool {
356        let a_borrowed = a.borrow();
357        let b_borrowed = b.borrow();
358
359        // Compare content
360        match (a_borrowed.content(), b_borrowed.content()) {
361            (Some(XmlContent::Element(ea)), Some(XmlContent::Element(eb))) => {
362                if ea.qname() != eb.qname() {
363                    return false;
364                }
365                // Compare attributes
366                if ea.attributes() != eb.attributes() {
367                    return false;
368                }
369            }
370            (Some(XmlContent::Text(ta)), Some(XmlContent::Text(tb))) => {
371                let text_a: String = ta.text().iter().collect();
372                let text_b: String = tb.text().iter().collect();
373                if text_a != text_b {
374                    return false;
375                }
376            }
377            (None, None) => {}
378            _ => return false,
379        }
380
381        // Compare children count
382        if a_borrowed.child_count() != b_borrowed.child_count() {
383            return false;
384        }
385
386        // Compare children recursively
387        for (child_a, child_b) in a_borrowed
388            .children()
389            .iter()
390            .zip(b_borrowed.children().iter())
391        {
392            if !trees_equal(child_a, child_b) {
393                return false;
394            }
395        }
396
397        true
398    }
399
400    #[test]
401    fn test_round_trip_simple() {
402        let xml = r#"<root>text</root>"#;
403        let tree1 = parse_str(xml).unwrap();
404        let output1 = print_to_string(&tree1).unwrap();
405        let tree2 = parse_str(&output1).unwrap();
406
407        assert!(trees_equal(&tree1, &tree2));
408    }
409
410    #[test]
411    fn test_round_trip_with_attributes() {
412        let xml = r#"<root id="foo" class="bar"><child name="test">content</child></root>"#;
413        let tree1 = parse_str(xml).unwrap();
414        let output1 = print_to_string(&tree1).unwrap();
415        let tree2 = parse_str(&output1).unwrap();
416
417        assert!(trees_equal(&tree1, &tree2));
418    }
419
420    #[test]
421    fn test_round_trip_nested() {
422        let xml = r#"<a><b><c><d>deep text</d></c></b></a>"#;
423        let tree1 = parse_str(xml).unwrap();
424        let output1 = print_to_string(&tree1).unwrap();
425        let tree2 = parse_str(&output1).unwrap();
426
427        assert!(trees_equal(&tree1, &tree2));
428    }
429
430    #[test]
431    fn test_round_trip_mixed_content() {
432        let xml = r#"<root>text1<child>inner</child>text2</root>"#;
433        let tree1 = parse_str(xml).unwrap();
434        let output1 = print_to_string(&tree1).unwrap();
435        let tree2 = parse_str(&output1).unwrap();
436
437        assert!(trees_equal(&tree1, &tree2));
438    }
439
440    #[test]
441    fn test_round_trip_empty_elements() {
442        let xml = r#"<root><empty /><also-empty></also-empty></root>"#;
443        let tree1 = parse_str(xml).unwrap();
444        let output1 = print_to_string(&tree1).unwrap();
445        let tree2 = parse_str(&output1).unwrap();
446
447        assert!(trees_equal(&tree1, &tree2));
448    }
449
450    #[test]
451    fn test_round_trip_entities() {
452        let xml =
453            r#"<root attr="&amp;&lt;&gt;&apos;&quot;">text with &amp; and &lt;tag&gt;</root>"#;
454        let tree1 = parse_str(xml).unwrap();
455        let output1 = print_to_string(&tree1).unwrap();
456        let tree2 = parse_str(&output1).unwrap();
457
458        assert!(trees_equal(&tree1, &tree2));
459    }
460
461    #[test]
462    fn test_double_round_trip() {
463        // Parse -> Print -> Parse -> Print should produce identical output
464        let xml = r#"<doc><section id="s1"><para>First paragraph.</para><para>Second paragraph.</para></section></doc>"#;
465        let tree1 = parse_str(xml).unwrap();
466        let output1 = print_to_string(&tree1).unwrap();
467        let tree2 = parse_str(&output1).unwrap();
468        let output2 = print_to_string(&tree2).unwrap();
469
470        // The second print should produce identical output
471        assert_eq!(output1, output2);
472    }
473}