Skip to main content

xdoc/writer/
mod.rs

1//! Deterministic XML serialization.
2//!
3//! Empty elements are serialized as self-closing tags, for example `<Empty/>`.
4//! Pretty serialization only inserts indentation for structural content. Elements
5//! containing text, CDATA, or mixed content are serialized inline to avoid
6//! changing semantically significant whitespace.
7
8use crate::core::{Document, ElementData, ErrorKind, NodeKind, QName, XmlError, XmlResult};
9
10const DEFAULT_ENCODING: &str = "UTF-8";
11
12/// Controls XML serialization.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct WriterConfig {
15    pretty: bool,
16    include_declaration: bool,
17    encoding: String,
18    indent: String,
19}
20
21impl WriterConfig {
22    pub fn compact() -> Self {
23        Self {
24            pretty: false,
25            include_declaration: false,
26            encoding: DEFAULT_ENCODING.to_owned(),
27            indent: String::new(),
28        }
29    }
30
31    pub fn pretty() -> Self {
32        Self {
33            pretty: true,
34            include_declaration: false,
35            encoding: DEFAULT_ENCODING.to_owned(),
36            indent: "  ".to_owned(),
37        }
38    }
39
40    pub fn with_xml_declaration(mut self, include_declaration: bool) -> Self {
41        self.include_declaration = include_declaration;
42        self
43    }
44
45    pub fn with_encoding(mut self, encoding: impl Into<String>) -> Self {
46        self.encoding = encoding.into();
47        self
48    }
49
50    pub fn with_indent(mut self, indent: impl Into<String>) -> Self {
51        self.indent = indent.into();
52        self
53    }
54
55    pub fn include_declaration(&self) -> bool {
56        self.include_declaration
57    }
58
59    pub fn encoding(&self) -> &str {
60        &self.encoding
61    }
62
63    pub fn indent(&self) -> &str {
64        &self.indent
65    }
66}
67
68impl Default for WriterConfig {
69    fn default() -> Self {
70        Self::compact()
71    }
72}
73
74/// Serializes a document without extra whitespace or XML declaration.
75pub fn to_string_compact(document: &Document) -> XmlResult<String> {
76    to_string_with_config(document, &WriterConfig::compact())
77}
78
79/// Serializes a document using stable indentation.
80pub fn to_string_pretty(document: &Document, config: WriterConfig) -> XmlResult<String> {
81    to_string_with_config(
82        document,
83        &WriterConfig {
84            pretty: true,
85            ..config
86        },
87    )
88}
89
90/// Serializes a document with an explicit writer configuration.
91pub fn to_string_with_config(document: &Document, config: &WriterConfig) -> XmlResult<String> {
92    let root = document.root().ok_or_else(|| {
93        XmlError::new(
94            ErrorKind::InvalidOperation,
95            "cannot serialize a document without a root element",
96        )
97    })?;
98
99    let mut output = String::new();
100    if config.include_declaration {
101        output.push_str("<?xml version=\"1.0\" encoding=\"");
102        output.push_str(&escape_attribute(&config.encoding));
103        output.push_str("\"?>");
104        if config.pretty {
105            output.push('\n');
106        }
107    }
108
109    write_node(document, root, config, 0, &mut output)?;
110    Ok(output)
111}
112
113/// Escapes text node content.
114pub fn escape_text(text: &str) -> String {
115    let mut escaped = String::with_capacity(text.len());
116    for ch in text.chars() {
117        match ch {
118            '&' => escaped.push_str("&amp;"),
119            '<' => escaped.push_str("&lt;"),
120            '>' => escaped.push_str("&gt;"),
121            _ => escaped.push(ch),
122        }
123    }
124    escaped
125}
126
127/// Escapes attribute values.
128pub fn escape_attribute(value: &str) -> String {
129    let mut escaped = String::with_capacity(value.len());
130    for ch in value.chars() {
131        match ch {
132            '&' => escaped.push_str("&amp;"),
133            '<' => escaped.push_str("&lt;"),
134            '>' => escaped.push_str("&gt;"),
135            '"' => escaped.push_str("&quot;"),
136            _ => escaped.push(ch),
137        }
138    }
139    escaped
140}
141
142fn write_node(
143    document: &Document,
144    node_id: crate::core::NodeId,
145    config: &WriterConfig,
146    depth: usize,
147    output: &mut String,
148) -> XmlResult<()> {
149    match document.node(node_id)?.kind() {
150        NodeKind::Element(element) => write_element(document, element, config, depth, output)?,
151        NodeKind::Text(text) => output.push_str(&escape_text(text)),
152        NodeKind::Comment(comment) => write_comment(comment, output)?,
153        NodeKind::CData(cdata) => write_cdata(cdata, output)?,
154        NodeKind::ProcessingInstruction { target, data } => {
155            write_processing_instruction(target, data.as_deref(), output)?
156        }
157    }
158
159    Ok(())
160}
161
162fn write_element(
163    document: &Document,
164    element: &ElementData,
165    config: &WriterConfig,
166    depth: usize,
167    output: &mut String,
168) -> XmlResult<()> {
169    output.push('<');
170    write_qname(element.name(), output);
171    write_namespace_declarations(element, output);
172    write_attributes(element, output);
173
174    if element.children().is_empty() {
175        output.push_str("/>");
176        return Ok(());
177    }
178
179    output.push('>');
180    if config.pretty {
181        if has_textual_content(document, element)? {
182            write_inline_children(document, element, config, depth, output)?;
183        } else {
184            write_pretty_children(document, element, config, depth, output)?;
185        }
186    } else {
187        write_inline_children(document, element, config, depth, output)?;
188    }
189
190    output.push_str("</");
191    write_qname(element.name(), output);
192    output.push('>');
193    Ok(())
194}
195
196fn has_textual_content(document: &Document, element: &ElementData) -> XmlResult<bool> {
197    for child in element.children() {
198        match document.node(*child)?.kind() {
199            NodeKind::Text(_) | NodeKind::CData(_) => return Ok(true),
200            NodeKind::Element(_)
201            | NodeKind::Comment(_)
202            | NodeKind::ProcessingInstruction { .. } => {}
203        }
204    }
205
206    Ok(false)
207}
208
209fn write_inline_children(
210    document: &Document,
211    element: &ElementData,
212    config: &WriterConfig,
213    depth: usize,
214    output: &mut String,
215) -> XmlResult<()> {
216    for child in element.children() {
217        write_node(document, *child, config, depth + 1, output)?;
218    }
219
220    Ok(())
221}
222
223fn write_pretty_children(
224    document: &Document,
225    element: &ElementData,
226    config: &WriterConfig,
227    depth: usize,
228    output: &mut String,
229) -> XmlResult<()> {
230    for child in element.children() {
231        output.push('\n');
232        write_indent(config, depth + 1, output);
233        write_node(document, *child, config, depth + 1, output)?;
234    }
235    output.push('\n');
236    write_indent(config, depth, output);
237    Ok(())
238}
239
240fn write_namespace_declarations(element: &ElementData, output: &mut String) {
241    for declaration in element.namespace_declarations() {
242        output.push(' ');
243        match declaration.prefix() {
244            Some(prefix) => {
245                output.push_str("xmlns:");
246                output.push_str(prefix.as_str());
247            }
248            None => output.push_str("xmlns"),
249        }
250        output.push_str("=\"");
251        output.push_str(&escape_attribute(declaration.uri().as_str()));
252        output.push('"');
253    }
254}
255
256fn write_attributes(element: &ElementData, output: &mut String) {
257    for attribute in element.attributes() {
258        output.push(' ');
259        write_qname(attribute.name(), output);
260        output.push_str("=\"");
261        output.push_str(&escape_attribute(attribute.value()));
262        output.push('"');
263    }
264}
265
266fn write_qname(name: &QName, output: &mut String) {
267    output.push_str(&name.lexical_name());
268}
269
270fn write_comment(comment: &str, output: &mut String) -> XmlResult<()> {
271    if comment.contains("--") {
272        return Err(XmlError::new(
273            ErrorKind::InvalidOperation,
274            "XML comments cannot contain `--`",
275        ));
276    }
277
278    output.push_str("<!--");
279    output.push_str(comment);
280    output.push_str("-->");
281    Ok(())
282}
283
284fn write_cdata(cdata: &str, output: &mut String) -> XmlResult<()> {
285    if cdata.contains("]]>") {
286        return Err(XmlError::new(
287            ErrorKind::InvalidOperation,
288            "CDATA sections cannot contain `]]>`",
289        ));
290    }
291
292    output.push_str("<![CDATA[");
293    output.push_str(cdata);
294    output.push_str("]]>");
295    Ok(())
296}
297
298fn write_processing_instruction(
299    target: &str,
300    data: Option<&str>,
301    output: &mut String,
302) -> XmlResult<()> {
303    if data.is_some_and(|data| data.contains("?>")) {
304        return Err(XmlError::new(
305            ErrorKind::InvalidOperation,
306            "processing instruction data cannot contain `?>`",
307        ));
308    }
309
310    output.push_str("<?");
311    output.push_str(target);
312    if let Some(data) = data {
313        output.push(' ');
314        output.push_str(data);
315    }
316    output.push_str("?>");
317    Ok(())
318}
319
320fn write_indent(config: &WriterConfig, depth: usize, output: &mut String) {
321    for _ in 0..depth {
322        output.push_str(config.indent());
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use crate::core::{Attribute, NamespaceDeclaration, QName};
330    use crate::testing::assert_xml_eq;
331
332    const SIMPLE_GOLDEN: &str = include_str!("../../tests/golden/writer_simple.xml");
333    const NAMESPACES_GOLDEN: &str = include_str!("../../tests/golden/writer_namespaces.xml");
334    const PRETTY_GOLDEN: &str = include_str!("../../tests/golden/writer_pretty.xml");
335    const PRETTY_MIXED_GOLDEN: &str = include_str!("../../tests/golden/writer_pretty_mixed.xml");
336    const PRETTY_CDATA_GOLDEN: &str = include_str!("../../tests/golden/writer_pretty_cdata.xml");
337    const PRETTY_STRUCTURAL_MISC_GOLDEN: &str =
338        include_str!("../../tests/golden/writer_pretty_structural_misc.xml");
339
340    fn qname(local: &str) -> QName {
341        QName::new(local).expect("valid qname")
342    }
343
344    fn simple_document() -> Document {
345        let mut document = Document::new();
346        let root = document.add_root_element(qname("Root")).expect("root");
347        let child = document.add_element(root, qname("Child")).expect("child");
348        document.add_text(child, "value").expect("text");
349        document
350    }
351
352    #[test]
353    fn writer_serializes_compact_xml() {
354        let document = simple_document();
355
356        let xml = to_string_compact(&document).expect("serialized XML");
357
358        assert_eq!(xml, "<Root><Child>value</Child></Root>");
359    }
360
361    #[test]
362    fn golden_simple_xml_matches_expected_file() {
363        let document = simple_document();
364
365        let xml = to_string_compact(&document).expect("serialized XML");
366
367        assert_xml_eq(SIMPLE_GOLDEN, &xml);
368    }
369
370    #[test]
371    fn golden_namespaces_xml_matches_expected_file() {
372        let mut document = Document::new();
373        let root = document
374            .add_root_element(QName::qualified("doc", "Root", "urn:doc").expect("qname"))
375            .expect("root");
376        document
377            .add_namespace_declaration(
378                root,
379                NamespaceDeclaration::default("urn:default").expect("namespace"),
380            )
381            .expect("namespace declaration");
382        document
383            .add_namespace_declaration(
384                root,
385                NamespaceDeclaration::prefixed("doc", "urn:doc").expect("namespace"),
386            )
387            .expect("namespace declaration");
388        document
389            .add_attribute(
390                root,
391                Attribute::new(
392                    QName::qualified("doc", "id", "urn:doc").expect("qname"),
393                    "123",
394                ),
395            )
396            .expect("attribute");
397
398        let xml = to_string_compact(&document).expect("serialized XML");
399
400        assert_xml_eq(NAMESPACES_GOLDEN, &xml);
401    }
402
403    #[test]
404    fn golden_pretty_xml_matches_expected_file() {
405        let document = simple_document();
406
407        let xml = to_string_pretty(&document, WriterConfig::pretty()).expect("serialized XML");
408
409        assert_xml_eq(PRETTY_GOLDEN, &xml);
410    }
411
412    #[test]
413    fn golden_pretty_preserves_mixed_content() {
414        let mut document = Document::new();
415        let root = document.add_root_element(qname("Paragraph")).expect("root");
416        document.add_text(root, "Hello ").expect("text");
417        let bold = document.add_element(root, qname("Bold")).expect("bold");
418        document.add_text(bold, "world").expect("bold text");
419        document.add_text(root, "!").expect("tail text");
420
421        let xml = to_string_pretty(&document, WriterConfig::pretty()).expect("serialized XML");
422
423        assert_xml_eq(PRETTY_MIXED_GOLDEN, &xml);
424    }
425
426    #[test]
427    fn golden_pretty_preserves_cdata_content() {
428        let mut document = Document::new();
429        let root = document.add_root_element(qname("Script")).expect("root");
430        document
431            .add_cdata(root, "if (a < b) { keep(); }")
432            .expect("cdata");
433
434        let xml = to_string_pretty(&document, WriterConfig::pretty()).expect("serialized XML");
435
436        assert_xml_eq(PRETTY_CDATA_GOLDEN, &xml);
437    }
438
439    #[test]
440    fn golden_pretty_indents_structural_comments_and_processing_instructions() {
441        let mut document = Document::new();
442        let root = document.add_root_element(qname("Root")).expect("root");
443        document.add_comment(root, "generated").expect("comment");
444        document
445            .add_processing_instruction(root, "xml-stylesheet", Some("href=\"style.xsl\""))
446            .expect("processing instruction");
447        let child = document.add_element(root, qname("Child")).expect("child");
448        document.add_text(child, "value").expect("text");
449
450        let xml = to_string_pretty(&document, WriterConfig::pretty()).expect("serialized XML");
451
452        assert_xml_eq(PRETTY_STRUCTURAL_MISC_GOLDEN, &xml);
453    }
454
455    #[test]
456    fn escaping_text_escapes_xml_special_characters() {
457        let mut document = Document::new();
458        let root = document.add_root_element(qname("Root")).expect("root");
459        document.add_text(root, "a & b < c > d").expect("text");
460
461        let xml = to_string_compact(&document).expect("serialized XML");
462
463        assert_eq!(xml, "<Root>a &amp; b &lt; c &gt; d</Root>");
464    }
465
466    #[test]
467    fn escaping_attributes_escapes_xml_special_characters() {
468        let mut document = Document::new();
469        let root = document.add_root_element(qname("Root")).expect("root");
470        document
471            .add_attribute(root, Attribute::new(qname("value"), "a & b < c > \"d\""))
472            .expect("attribute");
473
474        let xml = to_string_compact(&document).expect("serialized XML");
475
476        assert_eq!(xml, "<Root value=\"a &amp; b &lt; c &gt; &quot;d&quot;\"/>");
477    }
478
479    #[test]
480    fn writer_config_controls_xml_declaration_and_default_encoding() {
481        let document = simple_document();
482        let config = WriterConfig::compact().with_xml_declaration(true);
483
484        let xml = to_string_with_config(&document, &config).expect("serialized XML");
485
486        assert!(xml.starts_with("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
487        assert_eq!(config.encoding(), "UTF-8");
488    }
489
490    #[test]
491    fn writer_serializes_empty_elements_as_self_closing_tags() {
492        let mut document = Document::new();
493        document.add_root_element(qname("Empty")).expect("root");
494
495        let xml = to_string_compact(&document).expect("serialized XML");
496
497        assert_eq!(xml, "<Empty/>");
498    }
499
500    #[test]
501    fn writer_keeps_output_deterministic_and_does_not_modify_document() {
502        let document = simple_document();
503
504        let first = to_string_compact(&document).expect("serialized XML");
505        let second = to_string_compact(&document).expect("serialized XML");
506
507        assert_eq!(first, second);
508        assert_eq!(
509            document
510                .path(document.root().expect("root"))
511                .unwrap()
512                .to_string(),
513            "/Root"
514        );
515    }
516}