1use std::io::Write;
7
8use crate::node::{NodeRef, XmlContent};
9
10#[derive(Debug, Clone, Default)]
12pub struct XmlPrinterOptions {
13 pub pretty_print: bool,
15}
16
17pub struct XmlPrinter<W: Write> {
19 writer: W,
20 options: XmlPrinterOptions,
21 indent: usize,
22 state: PrintState,
24 content_stack: Vec<bool>,
26 has_content: bool,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq)]
31enum PrintState {
32 Initial,
33 AfterTag,
34 AfterChars,
35}
36
37impl<W: Write> XmlPrinter<W> {
38 pub fn new(writer: W) -> Self {
40 Self::with_options(writer, XmlPrinterOptions::default())
41 }
42
43 pub fn with_options(writer: W, options: XmlPrinterOptions) -> Self {
45 XmlPrinter {
46 writer,
47 options,
48 indent: 0,
49 state: PrintState::Initial,
50 content_stack: Vec::new(),
51 has_content: true, }
53 }
54
55 pub fn print(&mut self, root: &NodeRef) -> std::io::Result<()> {
57 self.print_node(root, false)
58 }
59
60 pub fn print_fragment(&mut self, root: &NodeRef) -> std::io::Result<()> {
62 self.print_node(root, true)
63 }
64
65 fn print_node(&mut self, node: &NodeRef, fragment: bool) -> std::io::Result<()> {
66 let borrowed = node.borrow();
67 let content = borrowed.content();
68
69 if !fragment {
70 self.start_document()?;
71 }
72
73 match content {
74 Some(XmlContent::Text(text)) => {
75 let text_str: String = text.text().iter().collect();
76 self.characters(&text_str)?;
77 }
78 Some(XmlContent::Comment(comment)) => {
79 if !self.has_content {
81 self.print_with_nl(">")?;
82 self.has_content = true;
83 }
84
85 let comment_text: String = comment.text().iter().collect();
86 self.print_with_nl(&format!(
87 "{}<!-- {} -->",
88 Self::indent_str(self.indent),
89 comment_text
90 ))?;
91 self.state = PrintState::AfterTag;
92 }
93 Some(XmlContent::ProcessingInstruction(pi)) => {
94 if !self.has_content {
96 self.print_with_nl(">")?;
97 self.has_content = true;
98 }
99
100 if pi.content().is_empty() {
101 self.print_with_nl(&format!(
102 "{}<?{}?>",
103 Self::indent_str(self.indent),
104 pi.target()
105 ))?;
106 } else {
107 self.print_with_nl(&format!(
108 "{}<?{} {}?>",
109 Self::indent_str(self.indent),
110 pi.target(),
111 pi.content()
112 ))?;
113 }
114 self.state = PrintState::AfterTag;
115 }
116 Some(XmlContent::Element(element)) => {
117 let qname = element.qname();
118
119 if qname == "$ROOT$" {
121 for child in borrowed.children() {
122 self.print_node(child, true)?;
123 }
124 } else {
125 self.start_element(qname, element.namespace_decls(), element.attributes())?;
126
127 for child in borrowed.children() {
128 self.print_node(child, true)?;
129 }
130
131 self.end_element(qname)?;
132 }
133 }
134 None => {}
135 }
136
137 if !fragment {
138 self.end_document()?;
139 }
140
141 Ok(())
142 }
143
144 fn start_document(&mut self) -> std::io::Result<()> {
145 self.has_content = true;
146 write!(self.writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
147 if self.options.pretty_print {
148 writeln!(self.writer)?;
149 }
150 self.state = PrintState::AfterTag;
151 Ok(())
152 }
153
154 fn end_document(&mut self) -> std::io::Result<()> {
155 if !self.options.pretty_print {
156 writeln!(self.writer)?;
157 }
158 self.writer.flush()
159 }
160
161 fn start_element(
162 &mut self,
163 qname: &str,
164 ns_decls: &std::collections::HashMap<String, String>,
165 attrs: &std::collections::HashMap<String, String>,
166 ) -> std::io::Result<()> {
167 if !self.has_content {
169 self.print_with_nl(">")?;
170 self.has_content = true;
171 }
172
173 if self.state == PrintState::AfterTag && !self.options.pretty_print {
175 writeln!(self.writer)?;
176 }
177
178 let mut tag = String::new();
180 tag.push('<');
181 tag.push_str(qname);
182
183 let mut ns_prefixes: Vec<&String> = ns_decls.keys().collect();
185 ns_prefixes.sort();
186 for prefix in ns_prefixes {
187 let uri = &ns_decls[prefix];
188 tag.push(' ');
189 if prefix.is_empty() {
190 tag.push_str("xmlns");
191 } else {
192 tag.push_str("xmlns:");
193 tag.push_str(prefix);
194 }
195 tag.push_str("=\"");
196 tag.push_str(&to_entities(uri));
197 tag.push('"');
198 }
199
200 let mut attr_names: Vec<&String> = attrs.keys().collect();
202 attr_names.sort();
203 for name in attr_names {
204 let value = &attrs[name];
205 tag.push(' ');
206 tag.push_str(name);
207 tag.push_str("=\"");
208 tag.push_str(&to_entities(value));
209 tag.push('"');
210 }
211
212 if self.options.pretty_print {
214 write!(self.writer, "{}", &Self::indent_str(self.indent))?;
215 }
216 write!(self.writer, "{}", tag)?;
217
218 self.content_stack.push(self.has_content);
220 self.has_content = false; self.indent += 1;
222 self.state = PrintState::AfterTag;
223
224 Ok(())
225 }
226
227 fn end_element(&mut self, qname: &str) -> std::io::Result<()> {
228 self.indent -= 1;
229
230 if !self.has_content {
231 self.print_with_nl(" />")?;
233 } else {
234 let close_tag = format!("</{}>", qname);
236
237 if self.state == PrintState::AfterChars {
238 self.print_with_nl(&close_tag)?;
240 } else {
241 if !self.options.pretty_print {
243 writeln!(self.writer)?;
244 }
245 if self.options.pretty_print {
246 write!(self.writer, "{}", &Self::indent_str(self.indent))?;
247 }
248 self.print_with_nl(&close_tag)?;
249 }
250 }
251
252 self.has_content = self.content_stack.pop().unwrap_or(true);
254 self.state = PrintState::AfterTag;
255
256 Ok(())
257 }
258
259 fn characters(&mut self, text: &str) -> std::io::Result<()> {
260 self.state = PrintState::AfterChars;
261
262 if !self.has_content {
264 write!(self.writer, ">")?;
265 }
266 self.has_content = true;
267
268 if text.is_empty() {
269 return Ok(());
270 }
271
272 let encoded = to_entities(text);
273 write!(self.writer, "{}", encoded)
275 }
276
277 fn print_with_nl(&mut self, s: &str) -> std::io::Result<()> {
278 if self.options.pretty_print {
279 writeln!(self.writer, "{}", s)
280 } else {
281 write!(self.writer, "{}", s)
282 }
283 }
284
285 fn indent_str(level: usize) -> String {
286 " ".repeat(level)
287 }
288}
289
290fn to_entities(s: &str) -> String {
292 let mut result = String::with_capacity(s.len());
293 for c in s.chars() {
294 match c {
295 '&' => result.push_str("&"),
296 '<' => result.push_str("<"),
297 '>' => result.push_str(">"),
298 '\'' => result.push_str("'"),
299 '"' => result.push_str("""),
300 _ => result.push(c),
301 }
302 }
303 result
304}
305
306pub fn print_to_string(root: &NodeRef) -> std::io::Result<String> {
308 let mut output = Vec::new();
309 {
310 let mut printer = XmlPrinter::new(&mut output);
311 printer.print(root)?;
312 }
313 Ok(String::from_utf8_lossy(&output).to_string())
314}
315
316pub fn print_to_string_pretty(root: &NodeRef) -> std::io::Result<String> {
318 let mut output = Vec::new();
319 {
320 let options = XmlPrinterOptions { pretty_print: true };
321 let mut printer = XmlPrinter::with_options(&mut output, options);
322 printer.print(root)?;
323 }
324 Ok(String::from_utf8_lossy(&output).to_string())
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330 use crate::xml::parse_str;
331
332 #[test]
333 fn test_print_simple() {
334 let xml = r#"<root>text</root>"#;
335 let root = parse_str(xml).unwrap();
336 let output = print_to_string(&root).unwrap();
337
338 assert!(output.starts_with("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
340 assert!(output.contains("<root>"));
341 assert!(output.contains("text"));
342 assert!(output.contains("</root>"));
343 }
344
345 #[test]
346 fn test_print_with_attributes() {
347 let xml = r#"<root id="foo">content</root>"#;
348 let root = parse_str(xml).unwrap();
349 let output = print_to_string(&root).unwrap();
350
351 assert!(output.contains(r#"id="foo""#));
352 assert!(output.contains("content"));
353 }
354
355 #[test]
356 fn test_print_empty_element() {
357 let xml = r#"<root><empty /></root>"#;
358 let root = parse_str(xml).unwrap();
359 let output = print_to_string(&root).unwrap();
360
361 assert!(output.contains("<empty />"));
363 }
364
365 #[test]
366 fn test_entity_encoding() {
367 let xml = r#"<root attr="&<>">&<></root>"#;
368 let root = parse_str(xml).unwrap();
369 let output = print_to_string(&root).unwrap();
370
371 assert!(output.contains("&"));
373 assert!(output.contains("<"));
374 assert!(output.contains(">"));
375 }
376
377 #[test]
378 fn test_print_nested() {
379 let xml = r#"<a><b><c>deep</c></b></a>"#;
380 let root = parse_str(xml).unwrap();
381 let output = print_to_string(&root).unwrap();
382
383 assert!(output.contains("<a>"));
384 assert!(output.contains("<b>"));
385 assert!(output.contains("<c>"));
386 assert!(output.contains("deep"));
387 assert!(output.contains("</c>"));
388 assert!(output.contains("</b>"));
389 assert!(output.contains("</a>"));
390 }
391
392 #[test]
393 fn test_pretty_print() {
394 let xml = r#"<root><child>text</child></root>"#;
395 let root = parse_str(xml).unwrap();
396 let output = print_to_string_pretty(&root).unwrap();
397
398 assert!(output.contains(" <child>"));
400 }
401
402 fn trees_equal(a: &NodeRef, b: &NodeRef) -> bool {
404 let a_borrowed = a.borrow();
405 let b_borrowed = b.borrow();
406
407 match (a_borrowed.content(), b_borrowed.content()) {
409 (Some(XmlContent::Element(ea)), Some(XmlContent::Element(eb))) => {
410 if ea.qname() != eb.qname() {
411 return false;
412 }
413 if ea.attributes() != eb.attributes() {
415 return false;
416 }
417 }
418 (Some(XmlContent::Text(ta)), Some(XmlContent::Text(tb))) => {
419 let text_a: String = ta.text().iter().collect();
420 let text_b: String = tb.text().iter().collect();
421 if text_a != text_b {
422 return false;
423 }
424 }
425 (None, None) => {}
426 _ => return false,
427 }
428
429 if a_borrowed.child_count() != b_borrowed.child_count() {
431 return false;
432 }
433
434 for (child_a, child_b) in a_borrowed
436 .children()
437 .iter()
438 .zip(b_borrowed.children().iter())
439 {
440 if !trees_equal(child_a, child_b) {
441 return false;
442 }
443 }
444
445 true
446 }
447
448 #[test]
449 fn test_round_trip_simple() {
450 let xml = r#"<root>text</root>"#;
451 let tree1 = parse_str(xml).unwrap();
452 let output1 = print_to_string(&tree1).unwrap();
453 let tree2 = parse_str(&output1).unwrap();
454
455 assert!(trees_equal(&tree1, &tree2));
456 }
457
458 #[test]
459 fn test_round_trip_with_attributes() {
460 let xml = r#"<root id="foo" class="bar"><child name="test">content</child></root>"#;
461 let tree1 = parse_str(xml).unwrap();
462 let output1 = print_to_string(&tree1).unwrap();
463 let tree2 = parse_str(&output1).unwrap();
464
465 assert!(trees_equal(&tree1, &tree2));
466 }
467
468 #[test]
469 fn test_round_trip_nested() {
470 let xml = r#"<a><b><c><d>deep text</d></c></b></a>"#;
471 let tree1 = parse_str(xml).unwrap();
472 let output1 = print_to_string(&tree1).unwrap();
473 let tree2 = parse_str(&output1).unwrap();
474
475 assert!(trees_equal(&tree1, &tree2));
476 }
477
478 #[test]
479 fn test_round_trip_mixed_content() {
480 let xml = r#"<root>text1<child>inner</child>text2</root>"#;
481 let tree1 = parse_str(xml).unwrap();
482 let output1 = print_to_string(&tree1).unwrap();
483 let tree2 = parse_str(&output1).unwrap();
484
485 assert!(trees_equal(&tree1, &tree2));
486 }
487
488 #[test]
489 fn test_round_trip_empty_elements() {
490 let xml = r#"<root><empty /><also-empty></also-empty></root>"#;
491 let tree1 = parse_str(xml).unwrap();
492 let output1 = print_to_string(&tree1).unwrap();
493 let tree2 = parse_str(&output1).unwrap();
494
495 assert!(trees_equal(&tree1, &tree2));
496 }
497
498 #[test]
499 fn test_round_trip_entities() {
500 let xml =
501 r#"<root attr="&<>'"">text with & and <tag></root>"#;
502 let tree1 = parse_str(xml).unwrap();
503 let output1 = print_to_string(&tree1).unwrap();
504 let tree2 = parse_str(&output1).unwrap();
505
506 assert!(trees_equal(&tree1, &tree2));
507 }
508
509 #[test]
510 fn test_double_round_trip() {
511 let xml = r#"<doc><section id="s1"><para>First paragraph.</para><para>Second paragraph.</para></section></doc>"#;
513 let tree1 = parse_str(xml).unwrap();
514 let output1 = print_to_string(&tree1).unwrap();
515 let tree2 = parse_str(&output1).unwrap();
516 let output2 = print_to_string(&tree2).unwrap();
517
518 assert_eq!(output1, output2);
520 }
521
522 #[test]
523 fn test_round_trip_namespace_declarations() {
524 let xml = r#"<root xmlns="http://example.com" xmlns:ns="http://ns.example.com"><ns:child /></root>"#;
525 let tree1 = parse_str(xml).unwrap();
526 let output1 = print_to_string(&tree1).unwrap();
527
528 assert!(output1.contains("xmlns="));
530 assert!(output1.contains("xmlns:ns="));
531
532 let tree2 = parse_str(&output1).unwrap();
534 let output2 = print_to_string(&tree2).unwrap();
535 assert_eq!(output1, output2);
536 }
537
538 #[test]
539 fn test_round_trip_processing_instruction() {
540 let xml = r#"<root><?target data?></root>"#;
541 let tree1 = parse_str(xml).unwrap();
542 let output1 = print_to_string(&tree1).unwrap();
543
544 assert!(output1.contains("<?target data?>"));
545
546 let tree2 = parse_str(&output1).unwrap();
547 let output2 = print_to_string(&tree2).unwrap();
548 assert_eq!(output1, output2);
549 }
550
551 #[test]
552 fn test_comment_with_adjacent_text() {
553 let xml = r#"<root>hello<!-- comment -->world</root>"#;
555 let tree1 = parse_str(xml).unwrap();
556 let output1 = print_to_string(&tree1).unwrap();
557
558 assert!(output1.contains("hello"));
560 assert!(output1.contains("comment"));
561 assert!(output1.contains("world"));
562
563 let hello_pos = output1.find("hello").unwrap();
565 let comment_pos = output1.find("comment").unwrap();
566 let world_pos = output1.find("world").unwrap();
567 assert!(hello_pos < comment_pos);
568 assert!(comment_pos < world_pos);
569 }
570}