use xml_tokens::serializer::*;
use xml_tokens::*;
pub struct XHTMLSerializer {
keep_references: bool,
keep_whitespace: bool,
keep_comments: bool,
keep_xml_declaration: bool,
}
impl XHTMLSerializer {
pub fn new(
keep_references: bool,
keep_whitespace: bool,
keep_comments: bool,
keep_xml_declaration: bool,
) -> XHTMLSerializer {
XHTMLSerializer {
keep_references,
keep_whitespace,
keep_comments,
keep_xml_declaration,
}
}
pub fn serialize(&self, tokens: &Vec<Token>) -> String {
let mut xml = String::new();
let mut i = 0;
let length = tokens.len();
while i < length {
match &tokens[i] {
Token::XMLDeclStart => {
if self.keep_xml_declaration {
xml.push_str("<?xml");
}
}
Token::XMLVersion(xml_version) => {
if self.keep_xml_declaration {
match xml_version {
XMLVersion::Version1_0 => xml.push_str(" version=\"1.0\""),
XMLVersion::Version1_1 => xml.push_str(" version=\"1.1\""),
}
}
}
Token::XMLEncoding(enc_name) => {
if self.keep_xml_declaration {
xml.push_str(&format!(" encoding=\"{}\"", enc_name.get_as_str()));
}
}
Token::XMLStandalone(standalone) => {
if self.keep_xml_declaration {
if *standalone {
xml.push_str(" standalone=\"yes\"");
} else {
xml.push_str(" standalone=\"no\"");
}
}
}
Token::XMLDeclEnd => {
if self.keep_xml_declaration {
xml.push_str(XML_DECL_END_AS_STR);
}
}
Token::DoctypeDeclStart => xml.push_str("<!DOCTYPE "),
Token::DoctypeName(name) => xml.push_str(name.get_as_str()),
Token::DoctypeDeclEnd => xml.push_str(DOCTYPE_DECL_END_AS_STR),
Token::Comment(comment) => {
if self.keep_comments {
xml.push_str(&format!("<!--{}-->", comment.get_as_str()))
}
}
Token::PIStart => xml.push_str("<?"),
Token::PITarget(target) => xml.push_str(target.get_as_str()),
Token::PIData(data) => xml.push_str(data.get_as_str()),
Token::PIEnd => xml.push_str(PI_END_AS_STR),
Token::ElementStart(qname) => match qname.get_prefix_as_str() {
Some(prefix) => {
xml.push_str(&format!("<{}:{}", prefix, qname.get_local_part_as_str()))
}
None => xml.push_str(&format!("<{}", qname.get_local_part_as_str())),
},
Token::ElementEmptyEnd => xml.push_str("/>"),
Token::ElementSTagEnd => xml.push_str(ELEMENT_STAG_END_AS_STR),
Token::ElementEnd(qname) => match qname.get_prefix_as_str() {
Some(prefix) => {
xml.push_str(&format!("</{}:{}>", prefix, qname.get_local_part_as_str()))
}
None => xml.push_str(&format!("</{}>", qname.get_local_part_as_str())),
},
Token::AttributeStart => {}
Token::AttributeName(qname) => match qname.get_prefix_as_str() {
Some(prefix) => {
xml.push_str(&format!(" {}:{}", prefix, qname.get_local_part_as_str()))
}
None => xml.push_str(&format!(" {}", qname.get_local_part_as_str())),
},
Token::AttributeValueStart => xml.push_str("=\""),
Token::AttributeValue(attribute_value) => {
xml.push_str(attribute_value.get_as_str())
}
Token::AttributeValueEnd => {}
Token::AttributeEnd => xml.push_str("\""),
Token::NamespaceStart => xml.push_str(" xmlns"),
Token::NamespaceDefault => {}
Token::NamespacePrefix(nc_name) => xml.push_str(nc_name.get_as_str()),
Token::NamespaceValue(namespace_value) => {
xml.push_str(&format!("=\"{}\"", namespace_value.get_as_str()))
}
Token::NamespaceEnd => {}
Token::Text(text) => {
if self.keep_whitespace {
xml.push_str(text.get_as_str());
} else {
let allow_head_whitespace: bool;
if i == 0 {
allow_head_whitespace = false;
} else {
allow_head_whitespace = XHTMLSerializer::allow_following_whitespace(
XHTMLSerializer::get_previous_token(tokens, i, self.keep_comments),
);
}
let allow_tail_whitespace: bool;
if i == 0 {
allow_tail_whitespace = false;
} else {
allow_tail_whitespace = XHTMLSerializer::allow_preceeding_whitespace(
XHTMLSerializer::get_next_token(tokens, i, self.keep_comments),
);
}
if allow_head_whitespace && allow_tail_whitespace {
xml.push_str(&text.deduplicate_whitespace());
} else if allow_head_whitespace {
xml.push_str(&text.normalize_space_deduplicate_head());
} else if allow_tail_whitespace {
xml.push_str(&text.normalize_space_deduplicate_tail());
} else {
xml.push_str(&text.normalize_space());
}
}
}
Token::CDATASection(cdata) => {
xml.push_str(&format!("<![CDATA[{}]]>", cdata.get_as_str()))
}
Token::EntityRef(name) => {
if self.keep_references {
xml.push_str(&format!("&{};", name.get_as_str()));
} else {
xml.push_str(&format!("&{};", name.get_as_str()));
}
}
Token::DecCharRef(dec_char_ref) => {
if self.keep_references {
xml.push_str(&format!("&#{};", dec_char_ref.get_as_u32()))
} else {
xml.push_str(&format!("{}", dec_char_ref.get_as_char()))
}
}
Token::HexCharRef(hex_char_ref) => {
if self.keep_references {
xml.push_str(&format!("&#x{};", hex_char_ref.get_as_u32()))
} else {
xml.push_str(&format!("{}", hex_char_ref.get_as_char()))
}
}
}
i += 1;
}
xml
}
fn get_previous_token(tokens: &Vec<Token>, index: usize, keep_comments: bool) -> &Token {
let mut previous_index = index - 1;
while previous_index > 0 {
match &tokens[previous_index] {
Token::Comment(_comment) => {
if keep_comments {
return &tokens[previous_index];
} else {
previous_index -= 1;
}
}
_ => {
return &tokens[previous_index];
}
}
}
&tokens[0]
}
fn get_next_token(tokens: &Vec<Token>, index: usize, keep_comments: bool) -> &Token {
let mut next_index = index + 1;
while next_index < tokens.len() {
match &tokens[next_index] {
Token::Comment(_comment) => {
if keep_comments {
return &tokens[next_index];
} else {
next_index += 1;
}
}
_ => {
return &tokens[next_index];
}
}
}
&tokens[tokens.len() - 1]
}
fn allow_preceeding_whitespace(token: &Token) -> bool {
match token {
Token::ElementStart(_qname) => {
return XHTMLSerializer::is_inline_element_local_name(
_qname.get_local_part_as_str(),
);
}
Token::CDATASection(_cdata_section) => true,
Token::Comment(_comment) => true,
Token::EntityRef(_name) => true,
Token::DecCharRef(_dec_char_ref) => true,
Token::HexCharRef(_hex_char_ref) => true,
Token::PIStart => true,
_ => false,
}
}
fn allow_following_whitespace(token: &Token) -> bool {
match token {
Token::ElementEnd(_qname) => {
return XHTMLSerializer::is_inline_element_local_name(
_qname.get_local_part_as_str(),
);
}
Token::CDATASection(_cdata_section) => true,
Token::Comment(_comment) => true,
Token::EntityRef(_name) => true,
Token::DecCharRef(_dec_char_ref) => true,
Token::HexCharRef(_hex_char_ref) => true,
Token::PIEnd => true,
_ => false,
}
}
fn is_inline_element_local_name(local_name: &str) -> bool {
match local_name {
"a" => true,
"abbr" => true,
"b" => true,
"bdi" => true,
"cite" => true,
"code" => true,
"data" => true,
"dfn" => true,
"em" => true,
"i" => true,
"kbd" => true,
"mark" => true,
"q" => true,
"s" => true,
"samp" => true,
"small" => true,
"span" => true,
"strong" => true,
"sub" => true,
"sup" => true,
"time" => true,
"u" => true,
"var" => true,
_ => false,
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}