xot 0.31.2

Full-featured XML tree library for Rust
Documentation
//! XML output method.
//! The main entry point is [`Parameters`], which you can pass into various
//! serialization methods to control the output.
//!
//! See [`Xot::serialize_xml_string`],
//! [`Xot::serialize_xml_string_with_normalizer`], [`Xot::serialize_xml_write`],
//! and [`Xot::serialize_xml_write_with_normalizer`].

// This follows the rules
// <https://www.w3.org/TR/xslt-xquery-serialization/#xml-output> but this API is
// modified to include only those features that make sense for Xot, and use
// enums to make the API more ergonomic where there are multiple interacting
// parameters.
//
// Here is how we diverge from the specification:
//
// * `normalization-form` is not directly supported, but if you enable the `icu`
//   feature you can use [`Xot::serialize_xml_string_with_normalizer`] to control
//   normalization with an ICU normalizer.
// * There is no way to declare the `version` parameter, as only XML 1.0 is
//   permitted at this time.
// * You can only influence encoding parameter of the XML declaration, and
//   this does not trigger actual encoding; output is always UTF-8 and it's up
//   to you to do any further re-encoding.
// * The `item-separator` parameter is specific to XPath/XSLT sequences and is
//   not supported directly by Xot.
// * The `media-type` property is only meaningful in the context of a larger
//   system and is not supported directly by Xot.
// * `undeclare-prefixes` is only supported by XML 1.1, which Xot does not
//   support at present.

#[cfg(doc)]
use crate::Xot;

use std::io::Write;

use crate::NameId;

use super::Indentation;

/// Parameters for XML generation.
///
/// You can use these parameters with [`Xot::serialize_xml_string`] to control
/// the XML output generated by Xot.
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Parameters {
    /// Pretty-print XML, and a list of elements where this is suppressed.
    pub indentation: Option<Indentation>,
    /// Elements that should have their text content be serialized as CDATA
    /// sections.
    pub cdata_section_elements: Vec<NameId>,
    /// The XML declaration, if any.
    pub declaration: Option<Declaration>,
    /// The doctype declaration, if any.
    pub doctype: Option<DocType>,
    /// Whether to escape the `>` character in text content. By default this is
    /// true, which means that `>` is escaped as `&gt;`. If you set this to true,
    /// `>` is not escaped, except for the special case of `]]>` outside of CDATA,
    /// which is mandated by the XML specification to always be escaped.
    pub unescaped_gt: bool,
    // TODO: character maps
}

/// How to format the XML declaration.
///
/// Examples:
///
/// ```xml
/// <?xml version="1.0"?>
/// ```
///
/// ```xml
/// <?xml version="1.0" standalone="yes"?>
/// ```
///
/// ```xml
/// <?xml version="1.0" encoding="UTF-8"?>
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Declaration {
    /// This causes an encoding declaration to be included in the XML declaration.
    /// The text given here is taken literally. It does not affect the encoding of
    /// the output of serialization; that is always UTF-8.
    pub encoding: Option<String>,
    /// This causes a standalone declaration to be included in the XML declaration.
    pub standalone: Option<bool>,
    // TODO: if in the future we add XML 1.1 support, we can add the version here.
    // This way without a declaration it is automatically an XML 1.0 document. Potentially
    // also include `undeclare-prefixes` here, as that's only supported in XML 1.1.
}

impl Declaration {
    pub(crate) fn serialize(&self, w: &mut impl Write) -> Result<(), std::io::Error> {
        w.write_all(b"<?xml version=\"1.0\"")?;
        if let Some(encoding) = &self.encoding {
            w.write_all(b" encoding=\"")?;
            w.write_all(encoding.as_bytes())?;
            w.write_all(b"\"")?;
        }
        if let Some(standalone) = self.standalone {
            w.write_all(b" standalone=\"")?;
            w.write_all(if standalone { b"yes" } else { b"no" })?;
            w.write_all(b"\"")?;
        }
        w.write_all(b"?>\n")?;
        Ok(())
    }
}

/// The doctype declaration.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DocType {
    /// Public identifier and system identifier.
    ///
    /// Example:
    /// ```xml
    /// <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
    /// ```
    Public {
        /// The public identifier.
        public: String,
        /// The system identifier.
        system: String,
    },
    /// System identifier only.
    ///
    /// Example:
    /// ```xml
    /// <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
    /// ```
    System {
        /// The system identifier.
        system: String,
    },
}

impl DocType {
    pub(crate) fn serialize(&self, name: &str, w: &mut impl Write) -> Result<(), std::io::Error> {
        w.write_all(b"<!DOCTYPE ")?;
        w.write_all(name.as_bytes())?;
        match self {
            DocType::Public { public, system } => {
                w.write_all(b" PUBLIC \"")?;
                w.write_all(public.as_bytes())?;
                w.write_all(b"\" \"")?;
                w.write_all(system.as_bytes())?;
                w.write_all(b"\"")?;
            }
            DocType::System { system } => {
                w.write_all(b" SYSTEM \"")?;
                w.write_all(system.as_bytes())?;
                w.write_all(b"\"")?;
            }
        }
        w.write_all(b">\n")?;
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use crate::{output::Indentation, Xot};

    use super::*;

    #[test]
    fn test_xml_output_default() {
        let m = Parameters {
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse("<doc><p>hello</p></doc>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc><p>hello</p></doc>"#
        );
    }

    #[test]
    fn test_xml_output_indent() {
        let m = Parameters {
            indentation: Some(Default::default()),
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse("<doc><p>hello</p></doc>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc>
  <p>hello</p>
</doc>
"#
        );
    }

    #[test]
    fn test_xml_output_indent_not_suppress() {
        let mut xot = Xot::new();

        let m = Parameters {
            indentation: Some(Indentation { suppress: vec![] }),
            ..Default::default()
        };
        let doc = xot.parse("<doc><p><k>foo</k></p></doc>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc>
  <p>
    <k>foo</k>
  </p>
</doc>
"#
        );
    }

    #[test]
    fn test_xml_output_indent_suppress() {
        let mut xot = Xot::new();
        let p = xot.add_name("p");
        let m = Parameters {
            indentation: Some(Indentation { suppress: vec![p] }),
            ..Default::default()
        };
        let doc = xot.parse("<doc><p><k>foo</k></p></doc>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc>
  <p><k>foo</k></p>
</doc>
"#
        );
    }

    #[test]
    fn test_xml_output_declaration() {
        let m = Parameters {
            declaration: Some(Default::default()),
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse("<doc/>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<?xml version="1.0"?>
<doc/>"#
        );
    }

    #[test]
    fn test_xml_output_declaration_standalone() {
        let m = Parameters {
            declaration: Some(Declaration {
                standalone: Some(true),
                ..Default::default()
            }),
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse("<doc/>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<?xml version="1.0" standalone="yes"?>
<doc/>"#
        );
    }

    #[test]
    fn test_xml_output_doctype_public() {
        let m = Parameters {
            doctype: Some(DocType::Public {
                public: "-//W3C//DTD XHTML 1.0 Strict//EN".to_string(),
                system: "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string(),
            }),
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse("<doc/>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<!DOCTYPE doc PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<doc/>"#
        );
    }

    #[test]
    fn test_xml_output_doctype_system() {
        let m = Parameters {
            doctype: Some(DocType::System {
                system: "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string(),
            }),
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse("<doc/>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<!DOCTYPE doc SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<doc/>"#
        );
    }

    #[test]
    fn test_xml_output_doctype_prefixed_name() {
        let m = Parameters {
            doctype: Some(DocType::System {
                system: "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".to_string(),
            }),
            ..Default::default()
        };
        let mut xot = Xot::new();
        let doc = xot.parse(r#"<prefix:doc xmlns:prefix="foo"/>"#).unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<!DOCTYPE prefix:doc SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<prefix:doc xmlns:prefix="foo"/>"#
        );
    }

    #[test]
    fn test_cdata_sections_elements() {
        let mut xot = Xot::new();
        let p = xot.add_name("p");
        let m = Parameters {
            cdata_section_elements: vec![p],
            ..Default::default()
        };
        let doc = xot.parse("<doc><p>hello</p></doc>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc><p><![CDATA[hello]]></p></doc>"#
        );
    }

    #[test]
    fn test_cdata_sections_elements_multiple() {
        let mut xot = Xot::new();
        let p = xot.add_name("p");
        let m = Parameters {
            cdata_section_elements: vec![p],
            ..Default::default()
        };
        let doc = xot.parse("<doc><p>hello<s> </s>world</p></doc>").unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc><p><![CDATA[hello]]><s> </s><![CDATA[world]]></p></doc>"#
        );
    }

    #[test]
    fn test_cdata_sections_elements_with_end_characters() {
        let mut xot = Xot::new();
        let p = xot.add_name("p");
        let m = Parameters {
            cdata_section_elements: vec![p],
            ..Default::default()
        };
        let doc = xot.parse(r#"<doc><p>hello]]&gt; world</p></doc>"#).unwrap();

        assert_eq!(
            xot.serialize_xml_string(m, doc).unwrap(),
            r#"<doc><p><![CDATA[hello]]]]><![CDATA[> world]]></p></doc>"#
        );
    }
}