Skip to main content

xml_3dm/xml/
printer.rs

1//! XML printer that outputs node trees.
2//!
3//! This printer outputs XML that matches the Java implementation's format
4//! for byte-for-byte compatibility.
5
6use std::io::Write;
7
8use crate::node::{NodeRef, XmlContent};
9
10/// Options for XML printing.
11#[derive(Debug, Clone, Default)]
12pub struct XmlPrinterOptions {
13    /// Whether to pretty-print with indentation.
14    pub pretty_print: bool,
15}
16
17/// XML printer that outputs node trees.
18pub struct XmlPrinter<W: Write> {
19    writer: W,
20    options: XmlPrinterOptions,
21    indent: usize,
22    /// State tracking for proper tag closing
23    state: PrintState,
24    /// Stack of "has content" flags for each element level
25    content_stack: Vec<bool>,
26    /// Whether current element has content
27    has_content: bool,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31enum PrintState {
32    Initial,
33    AfterTag,
34    AfterChars,
35}
36
37impl<W: Write> XmlPrinter<W> {
38    /// Creates a new XML printer.
39    pub fn new(writer: W) -> Self {
40        Self::with_options(writer, XmlPrinterOptions::default())
41    }
42
43    /// Creates a new XML printer with the given options.
44    pub fn with_options(writer: W, options: XmlPrinterOptions) -> Self {
45        XmlPrinter {
46            writer,
47            options,
48            indent: 0,
49            state: PrintState::Initial,
50            content_stack: Vec::new(),
51            has_content: true, // Start as true (like Java's HAS_CONTENT at document start)
52        }
53    }
54
55    /// Prints a node tree to the output.
56    pub fn print(&mut self, root: &NodeRef) -> std::io::Result<()> {
57        self.print_node(root, false)
58    }
59
60    /// Prints a node tree as a fragment (no XML declaration).
61    pub fn print_fragment(&mut self, root: &NodeRef) -> std::io::Result<()> {
62        self.print_node(root, true)
63    }
64
65    fn print_node(&mut self, node: &NodeRef, fragment: bool) -> std::io::Result<()> {
66        let borrowed = node.borrow();
67        let content = borrowed.content();
68
69        if !fragment {
70            self.start_document()?;
71        }
72
73        match content {
74            Some(XmlContent::Text(text)) => {
75                let text_str: String = text.text().iter().collect();
76                self.characters(&text_str)?;
77            }
78            Some(XmlContent::Comment(comment)) => {
79                // Close previous unclosed tag if needed
80                if !self.has_content {
81                    self.print_with_nl(">")?;
82                    self.has_content = true;
83                }
84
85                let comment_text: String = comment.text().iter().collect();
86                self.print_with_nl(&format!(
87                    "{}<!-- {} -->",
88                    Self::indent_str(self.indent),
89                    comment_text
90                ))?;
91                self.state = PrintState::AfterTag;
92            }
93            Some(XmlContent::ProcessingInstruction(pi)) => {
94                // Close previous unclosed tag if needed
95                if !self.has_content {
96                    self.print_with_nl(">")?;
97                    self.has_content = true;
98                }
99
100                if pi.content().is_empty() {
101                    self.print_with_nl(&format!(
102                        "{}<?{}?>",
103                        Self::indent_str(self.indent),
104                        pi.target()
105                    ))?;
106                } else {
107                    self.print_with_nl(&format!(
108                        "{}<?{} {}?>",
109                        Self::indent_str(self.indent),
110                        pi.target(),
111                        pi.content()
112                    ))?;
113                }
114                self.state = PrintState::AfterTag;
115            }
116            Some(XmlContent::Element(element)) => {
117                let qname = element.qname();
118
119                // Skip the synthetic $ROOT$ element but print its children
120                if qname == "$ROOT$" {
121                    for child in borrowed.children() {
122                        self.print_node(child, true)?;
123                    }
124                } else {
125                    self.start_element(qname, element.namespace_decls(), element.attributes())?;
126
127                    for child in borrowed.children() {
128                        self.print_node(child, true)?;
129                    }
130
131                    self.end_element(qname)?;
132                }
133            }
134            None => {}
135        }
136
137        if !fragment {
138            self.end_document()?;
139        }
140
141        Ok(())
142    }
143
144    fn start_document(&mut self) -> std::io::Result<()> {
145        self.has_content = true;
146        write!(self.writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
147        if self.options.pretty_print {
148            writeln!(self.writer)?;
149        }
150        self.state = PrintState::AfterTag;
151        Ok(())
152    }
153
154    fn end_document(&mut self) -> std::io::Result<()> {
155        if !self.options.pretty_print {
156            writeln!(self.writer)?;
157        }
158        self.writer.flush()
159    }
160
161    fn start_element(
162        &mut self,
163        qname: &str,
164        ns_decls: &std::collections::HashMap<String, String>,
165        attrs: &std::collections::HashMap<String, String>,
166    ) -> std::io::Result<()> {
167        // Close previous unclosed tag if needed
168        if !self.has_content {
169            self.print_with_nl(">")?;
170            self.has_content = true;
171        }
172
173        // In non-pretty mode, add newline between tags
174        if self.state == PrintState::AfterTag && !self.options.pretty_print {
175            writeln!(self.writer)?;
176        }
177
178        // Build opening tag
179        let mut tag = String::new();
180        tag.push('<');
181        tag.push_str(qname);
182
183        // Add namespace declarations (sorted for deterministic output)
184        let mut ns_prefixes: Vec<&String> = ns_decls.keys().collect();
185        ns_prefixes.sort();
186        for prefix in ns_prefixes {
187            let uri = &ns_decls[prefix];
188            tag.push(' ');
189            if prefix.is_empty() {
190                tag.push_str("xmlns");
191            } else {
192                tag.push_str("xmlns:");
193                tag.push_str(prefix);
194            }
195            tag.push_str("=\"");
196            tag.push_str(&to_entities(uri));
197            tag.push('"');
198        }
199
200        // Add attributes (sorted for deterministic output)
201        let mut attr_names: Vec<&String> = attrs.keys().collect();
202        attr_names.sort();
203        for name in attr_names {
204            let value = &attrs[name];
205            tag.push(' ');
206            tag.push_str(name);
207            tag.push_str("=\"");
208            tag.push_str(&to_entities(value));
209            tag.push('"');
210        }
211
212        // Print with indentation if pretty printing
213        if self.options.pretty_print {
214            write!(self.writer, "{}", &Self::indent_str(self.indent))?;
215        }
216        write!(self.writer, "{}", tag)?;
217
218        // Push state and increment indent
219        self.content_stack.push(self.has_content);
220        self.has_content = false; // Reset for new element
221        self.indent += 1;
222        self.state = PrintState::AfterTag;
223
224        Ok(())
225    }
226
227    fn end_element(&mut self, qname: &str) -> std::io::Result<()> {
228        self.indent -= 1;
229
230        if !self.has_content {
231            // No content - use self-closing tag
232            self.print_with_nl(" />")?;
233        } else {
234            // Has content - print closing tag
235            let close_tag = format!("</{}>", qname);
236
237            if self.state == PrintState::AfterChars {
238                // Text content - closing tag goes inline (no indent, no preceding newline)
239                self.print_with_nl(&close_tag)?;
240            } else {
241                // Child elements - closing tag on new line with indent
242                if !self.options.pretty_print {
243                    writeln!(self.writer)?;
244                }
245                if self.options.pretty_print {
246                    write!(self.writer, "{}", &Self::indent_str(self.indent))?;
247                }
248                self.print_with_nl(&close_tag)?;
249            }
250        }
251
252        // Pop state
253        self.has_content = self.content_stack.pop().unwrap_or(true);
254        self.state = PrintState::AfterTag;
255
256        Ok(())
257    }
258
259    fn characters(&mut self, text: &str) -> std::io::Result<()> {
260        self.state = PrintState::AfterChars;
261
262        // Close previous unclosed tag if needed
263        if !self.has_content {
264            write!(self.writer, ">")?;
265        }
266        self.has_content = true;
267
268        if text.is_empty() {
269            return Ok(());
270        }
271
272        let encoded = to_entities(text);
273        // Text content should not have newlines added - keep it inline
274        write!(self.writer, "{}", encoded)
275    }
276
277    fn print_with_nl(&mut self, s: &str) -> std::io::Result<()> {
278        if self.options.pretty_print {
279            writeln!(self.writer, "{}", s)
280        } else {
281            write!(self.writer, "{}", s)
282        }
283    }
284
285    fn indent_str(level: usize) -> String {
286        "  ".repeat(level)
287    }
288}
289
290/// Converts special characters to XML entities.
291fn to_entities(s: &str) -> String {
292    let mut result = String::with_capacity(s.len());
293    for c in s.chars() {
294        match c {
295            '&' => result.push_str("&amp;"),
296            '<' => result.push_str("&lt;"),
297            '>' => result.push_str("&gt;"),
298            '\'' => result.push_str("&apos;"),
299            '"' => result.push_str("&quot;"),
300            _ => result.push(c),
301        }
302    }
303    result
304}
305
306/// Prints a node tree to a string.
307pub fn print_to_string(root: &NodeRef) -> std::io::Result<String> {
308    let mut output = Vec::new();
309    {
310        let mut printer = XmlPrinter::new(&mut output);
311        printer.print(root)?;
312    }
313    Ok(String::from_utf8_lossy(&output).to_string())
314}
315
316/// Prints a node tree to a string with pretty printing.
317pub fn print_to_string_pretty(root: &NodeRef) -> std::io::Result<String> {
318    let mut output = Vec::new();
319    {
320        let options = XmlPrinterOptions { pretty_print: true };
321        let mut printer = XmlPrinter::with_options(&mut output, options);
322        printer.print(root)?;
323    }
324    Ok(String::from_utf8_lossy(&output).to_string())
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use crate::xml::parse_str;
331
332    #[test]
333    fn test_print_simple() {
334        let xml = r#"<root>text</root>"#;
335        let root = parse_str(xml).unwrap();
336        let output = print_to_string(&root).unwrap();
337
338        // Should have XML declaration and the element
339        assert!(output.starts_with("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
340        assert!(output.contains("<root>"));
341        assert!(output.contains("text"));
342        assert!(output.contains("</root>"));
343    }
344
345    #[test]
346    fn test_print_with_attributes() {
347        let xml = r#"<root id="foo">content</root>"#;
348        let root = parse_str(xml).unwrap();
349        let output = print_to_string(&root).unwrap();
350
351        assert!(output.contains(r#"id="foo""#));
352        assert!(output.contains("content"));
353    }
354
355    #[test]
356    fn test_print_empty_element() {
357        let xml = r#"<root><empty /></root>"#;
358        let root = parse_str(xml).unwrap();
359        let output = print_to_string(&root).unwrap();
360
361        // Empty element should be self-closing
362        assert!(output.contains("<empty />"));
363    }
364
365    #[test]
366    fn test_entity_encoding() {
367        let xml = r#"<root attr="&amp;&lt;&gt;">&amp;&lt;&gt;</root>"#;
368        let root = parse_str(xml).unwrap();
369        let output = print_to_string(&root).unwrap();
370
371        // Entities should be preserved
372        assert!(output.contains("&amp;"));
373        assert!(output.contains("&lt;"));
374        assert!(output.contains("&gt;"));
375    }
376
377    #[test]
378    fn test_print_nested() {
379        let xml = r#"<a><b><c>deep</c></b></a>"#;
380        let root = parse_str(xml).unwrap();
381        let output = print_to_string(&root).unwrap();
382
383        assert!(output.contains("<a>"));
384        assert!(output.contains("<b>"));
385        assert!(output.contains("<c>"));
386        assert!(output.contains("deep"));
387        assert!(output.contains("</c>"));
388        assert!(output.contains("</b>"));
389        assert!(output.contains("</a>"));
390    }
391
392    #[test]
393    fn test_pretty_print() {
394        let xml = r#"<root><child>text</child></root>"#;
395        let root = parse_str(xml).unwrap();
396        let output = print_to_string_pretty(&root).unwrap();
397
398        // Pretty print should have indentation
399        assert!(output.contains(" <child>"));
400    }
401
402    /// Helper to compare tree structure (element names and text content)
403    fn trees_equal(a: &NodeRef, b: &NodeRef) -> bool {
404        let a_borrowed = a.borrow();
405        let b_borrowed = b.borrow();
406
407        // Compare content
408        match (a_borrowed.content(), b_borrowed.content()) {
409            (Some(XmlContent::Element(ea)), Some(XmlContent::Element(eb))) => {
410                if ea.qname() != eb.qname() {
411                    return false;
412                }
413                // Compare attributes
414                if ea.attributes() != eb.attributes() {
415                    return false;
416                }
417            }
418            (Some(XmlContent::Text(ta)), Some(XmlContent::Text(tb))) => {
419                let text_a: String = ta.text().iter().collect();
420                let text_b: String = tb.text().iter().collect();
421                if text_a != text_b {
422                    return false;
423                }
424            }
425            (None, None) => {}
426            _ => return false,
427        }
428
429        // Compare children count
430        if a_borrowed.child_count() != b_borrowed.child_count() {
431            return false;
432        }
433
434        // Compare children recursively
435        for (child_a, child_b) in a_borrowed
436            .children()
437            .iter()
438            .zip(b_borrowed.children().iter())
439        {
440            if !trees_equal(child_a, child_b) {
441                return false;
442            }
443        }
444
445        true
446    }
447
448    #[test]
449    fn test_round_trip_simple() {
450        let xml = r#"<root>text</root>"#;
451        let tree1 = parse_str(xml).unwrap();
452        let output1 = print_to_string(&tree1).unwrap();
453        let tree2 = parse_str(&output1).unwrap();
454
455        assert!(trees_equal(&tree1, &tree2));
456    }
457
458    #[test]
459    fn test_round_trip_with_attributes() {
460        let xml = r#"<root id="foo" class="bar"><child name="test">content</child></root>"#;
461        let tree1 = parse_str(xml).unwrap();
462        let output1 = print_to_string(&tree1).unwrap();
463        let tree2 = parse_str(&output1).unwrap();
464
465        assert!(trees_equal(&tree1, &tree2));
466    }
467
468    #[test]
469    fn test_round_trip_nested() {
470        let xml = r#"<a><b><c><d>deep text</d></c></b></a>"#;
471        let tree1 = parse_str(xml).unwrap();
472        let output1 = print_to_string(&tree1).unwrap();
473        let tree2 = parse_str(&output1).unwrap();
474
475        assert!(trees_equal(&tree1, &tree2));
476    }
477
478    #[test]
479    fn test_round_trip_mixed_content() {
480        let xml = r#"<root>text1<child>inner</child>text2</root>"#;
481        let tree1 = parse_str(xml).unwrap();
482        let output1 = print_to_string(&tree1).unwrap();
483        let tree2 = parse_str(&output1).unwrap();
484
485        assert!(trees_equal(&tree1, &tree2));
486    }
487
488    #[test]
489    fn test_round_trip_empty_elements() {
490        let xml = r#"<root><empty /><also-empty></also-empty></root>"#;
491        let tree1 = parse_str(xml).unwrap();
492        let output1 = print_to_string(&tree1).unwrap();
493        let tree2 = parse_str(&output1).unwrap();
494
495        assert!(trees_equal(&tree1, &tree2));
496    }
497
498    #[test]
499    fn test_round_trip_entities() {
500        let xml =
501            r#"<root attr="&amp;&lt;&gt;&apos;&quot;">text with &amp; and &lt;tag&gt;</root>"#;
502        let tree1 = parse_str(xml).unwrap();
503        let output1 = print_to_string(&tree1).unwrap();
504        let tree2 = parse_str(&output1).unwrap();
505
506        assert!(trees_equal(&tree1, &tree2));
507    }
508
509    #[test]
510    fn test_double_round_trip() {
511        // Parse -> Print -> Parse -> Print should produce identical output
512        let xml = r#"<doc><section id="s1"><para>First paragraph.</para><para>Second paragraph.</para></section></doc>"#;
513        let tree1 = parse_str(xml).unwrap();
514        let output1 = print_to_string(&tree1).unwrap();
515        let tree2 = parse_str(&output1).unwrap();
516        let output2 = print_to_string(&tree2).unwrap();
517
518        // The second print should produce identical output
519        assert_eq!(output1, output2);
520    }
521
522    #[test]
523    fn test_round_trip_namespace_declarations() {
524        let xml = r#"<root xmlns="http://example.com" xmlns:ns="http://ns.example.com"><ns:child /></root>"#;
525        let tree1 = parse_str(xml).unwrap();
526        let output1 = print_to_string(&tree1).unwrap();
527
528        // Namespace declarations should appear in output
529        assert!(output1.contains("xmlns="));
530        assert!(output1.contains("xmlns:ns="));
531
532        // Double round-trip should be stable
533        let tree2 = parse_str(&output1).unwrap();
534        let output2 = print_to_string(&tree2).unwrap();
535        assert_eq!(output1, output2);
536    }
537
538    #[test]
539    fn test_round_trip_processing_instruction() {
540        let xml = r#"<root><?target data?></root>"#;
541        let tree1 = parse_str(xml).unwrap();
542        let output1 = print_to_string(&tree1).unwrap();
543
544        assert!(output1.contains("<?target data?>"));
545
546        let tree2 = parse_str(&output1).unwrap();
547        let output2 = print_to_string(&tree2).unwrap();
548        assert_eq!(output1, output2);
549    }
550
551    #[test]
552    fn test_comment_with_adjacent_text() {
553        // Verify text nodes are correctly flushed before comments
554        let xml = r#"<root>hello<!-- comment -->world</root>"#;
555        let tree1 = parse_str(xml).unwrap();
556        let output1 = print_to_string(&tree1).unwrap();
557
558        // Text and comment should both be present and in correct order
559        assert!(output1.contains("hello"));
560        assert!(output1.contains("comment"));
561        assert!(output1.contains("world"));
562
563        // Verify ordering: hello before comment, comment before world
564        let hello_pos = output1.find("hello").unwrap();
565        let comment_pos = output1.find("comment").unwrap();
566        let world_pos = output1.find("world").unwrap();
567        assert!(hello_pos < comment_pos);
568        assert!(comment_pos < world_pos);
569    }
570}