1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
use super::*;

pub const XML_DECL_END_AS_STR: &str = "?>";
pub const PI_END_AS_STR: &str = XML_DECL_END_AS_STR;
pub const DOCTYPE_DECL_END_AS_STR: &str = ">";
pub const ELEMENT_STAG_END_AS_STR: &str = DOCTYPE_DECL_END_AS_STR;

pub enum WhitespaceMode {
    Deduplicate,
    Leave,
    Normalize,
    NormalizeDededuplicateHead,
    NormalizeDededuplicateTail,
}

pub struct Serializer {
    expand_character_references: bool,
    omit_comments: bool,
    whitespace_mode: WhitespaceMode,
}
impl Serializer {
    pub fn new(
        expand_character_references: bool,
        omit_comments: bool,
        whitespace_mode: WhitespaceMode,
    ) -> Serializer {
        Serializer {
            expand_character_references,
            omit_comments,
            whitespace_mode,
        }
    }

    pub fn serialize(&self, tokens: &Vec<Token>) -> String {
        let mut xml = String::new();

        for token in tokens {
            match token {
                Token::XMLDeclStart => xml.push_str("<?xml"),
                Token::XMLVersion(xml_version) => match xml_version {
                    XMLVersion::Version1_0 => xml.push_str(" version=\"1.0\""),
                    XMLVersion::Version1_1 => xml.push_str(" version=\"1.1\""),
                },
                Token::XMLEncoding(enc_name) => {
                    xml.push_str(&format!(" encoding=\"{}\"", enc_name.get_as_str()))
                }
                Token::XMLStandalone(standalone) => {
                    if *standalone {
                        xml.push_str(" standalone=\"yes\"");
                    } else {
                        xml.push_str(" standalone=\"no\"");
                    }
                }
                Token::XMLDeclEnd => xml.push_str(XML_DECL_END_AS_STR),
                Token::DoctypeDeclStart => xml.push_str("<!DOCTYPE "),
                Token::DoctypeName(name) => xml.push_str(name.get_as_str()),
                Token::DoctypeDeclEnd => xml.push_str(DOCTYPE_DECL_END_AS_STR),
                Token::Comment(comment) => {
                    if !self.omit_comments {
                        xml.push_str(&format!("<!--{}-->", comment.get_as_str()))
                    }
                }
                Token::PIStart => xml.push_str("<?"),
                Token::PITarget(target) => xml.push_str(target.get_as_str()),
                Token::PIData(data) => xml.push_str(data.get_as_str()),
                Token::PIEnd => xml.push_str(PI_END_AS_STR),
                Token::ElementStart(qname) => match qname.get_prefix_as_str() {
                    Some(prefix) => {
                        xml.push_str(&format!("<{}:{}", prefix, qname.get_local_part_as_str()))
                    }
                    None => xml.push_str(&format!("<{}", qname.get_local_part_as_str())),
                },
                Token::ElementEmptyEnd => xml.push_str("/>"),
                Token::ElementSTagEnd => xml.push_str(ELEMENT_STAG_END_AS_STR),
                Token::ElementEnd(qname) => match qname.get_prefix_as_str() {
                    Some(prefix) => {
                        xml.push_str(&format!("</{}:{}>", prefix, qname.get_local_part_as_str()))
                    }
                    None => xml.push_str(&format!("</{}>", qname.get_local_part_as_str())),
                },
                Token::AttributeStart => {}
                Token::AttributeName(qname) => match qname.get_prefix_as_str() {
                    Some(prefix) => {
                        xml.push_str(&format!(" {}:{}", prefix, qname.get_local_part_as_str()))
                    }
                    None => xml.push_str(&format!(" {}", qname.get_local_part_as_str())),
                },
                Token::AttributeValueStart => xml.push_str("=\""),
                Token::AttributeValue(attribute_value) => {
                    xml.push_str(attribute_value.get_as_str())
                }
                Token::AttributeValueEnd => {}
                Token::AttributeEnd => xml.push_str("\""),
                Token::NamespaceStart => xml.push_str(" xmlns"),
                Token::NamespaceDefault => {}
                Token::NamespacePrefix(nc_name) => xml.push_str(nc_name.get_as_str()),
                Token::NamespaceValue(namespace_value) => {
                    xml.push_str(&format!("=\"{}\"", namespace_value.get_as_str()))
                }
                Token::NamespaceEnd => {}
                Token::Text(text) => match self.whitespace_mode {
                    WhitespaceMode::Deduplicate => xml.push_str(&text.deduplicate_whitespace()),
                    WhitespaceMode::Leave => xml.push_str(text.get_as_str()),
                    WhitespaceMode::Normalize => xml.push_str(&text.normalize_space()),
                    WhitespaceMode::NormalizeDededuplicateHead => {
                        xml.push_str(&text.normalize_space_deduplicate_head())
                    }
                    WhitespaceMode::NormalizeDededuplicateTail => {
                        xml.push_str(&text.normalize_space_deduplicate_tail())
                    }
                },
                Token::CDATASection(cdata) => {
                    xml.push_str(&format!("<![CDATA[{}]]>", cdata.get_as_str()))
                }
                Token::EntityRef(name) => xml.push_str(&format!("&{};", name.get_as_str())),
                Token::DecCharRef(dec_char_ref) => {
                    if self.expand_character_references {
                        // TODO check character can be legally expanded
                        xml.push_str(&format!("{}", dec_char_ref.get_as_char()))
                    } else {
                        xml.push_str(&format!("&#{};", dec_char_ref.get_as_u32()))
                    }
                }
                Token::HexCharRef(hex_char_ref) => {
                    if self.expand_character_references {
                        // TODO check character can be legally expanded
                        xml.push_str(&format!("{}", hex_char_ref.get_as_char()))
                    } else {
                        xml.push_str(&format!("&#x{};", hex_char_ref.get_as_u32()))
                    }
                }
            }
        }

        xml
    }
}