use std::borrow::Cow;
use std::io;
use crate::entity::{serialize_attribute, serialize_cdata, serialize_text};
use crate::error::Error;
use crate::id::NameId;
use crate::output::Normalizer;
use crate::xotdata::{Node, Xot};
use super::fullname::FullnameSerializer;
use super::html5elements::Html5Elements;
use super::{Output, OutputToken, Pretty};
pub(crate) struct Html5Serializer<'a, N: Normalizer> {
xot: &'a Xot,
html5_elements: &'a Html5Elements,
cdata_section_names: &'a [NameId],
fullname_serializer: FullnameSerializer<'a>,
normalizer: N,
}
fn html_matches_suppress(
xot: &Xot,
html5_elements: &Html5Elements,
names: &[NameId],
name_id: NameId,
) -> bool {
for suppress_name in names {
if name_id == *suppress_name {
return true;
}
let suppress_name_ns = xot.namespace_for_name(*suppress_name);
if !html5_elements.is_html_namespace(xot, suppress_name_ns) {
return false;
}
let name_ns = xot.namespace_for_name(name_id);
if !html5_elements.is_html_namespace(xot, name_ns) {
return false;
}
let suppress_name = xot.local_name_str(*suppress_name).to_ascii_lowercase();
let name = xot.local_name_str(name_id).to_ascii_lowercase();
if suppress_name == name {
return true;
}
}
false
}
impl<'a, N: Normalizer> Html5Serializer<'a, N> {
pub(crate) fn new(
xot: &'a Xot,
html5_elements: &'a Html5Elements,
node: Node,
cdata_section_names: &'a [NameId],
normalizer: N,
) -> Self {
let extra_declarations = xot.namespaces_in_scope(node).collect();
let fullname_serializer = FullnameSerializer::new(xot, extra_declarations);
Self {
xot,
html5_elements,
cdata_section_names,
fullname_serializer,
normalizer,
}
}
pub(crate) fn serialize<W: io::Write>(
&mut self,
w: &mut W,
outputs: impl Iterator<Item = (Node, Output<'a>)>,
) -> Result<(), Error> {
for (node, output) in outputs {
self.serialize_node(w, node, output)?;
}
Ok(())
}
pub(crate) fn serialize_pretty<W: io::Write>(
&mut self,
w: &mut W,
outputs: impl Iterator<Item = (Node, Output<'a>)>,
suppress: &[NameId],
) -> Result<(), Error> {
let is_suppressed = |name_id| {
self.html5_elements
.formatted_names
.matches(self.xot, name_id)
|| html_matches_suppress(self.xot, self.html5_elements, suppress, name_id)
};
let is_inline = |name_id| self.html5_elements.is_inline(self.xot, name_id);
let mut pretty = Pretty::new(self.xot, is_suppressed, is_inline);
for (node, output) in outputs {
let (indentation, newline) = pretty.prettify(node, &output);
if indentation > 0 {
w.write_all(" ".repeat(indentation * 2).as_bytes())?;
}
self.serialize_node(w, node, output)?;
if newline {
w.write_all(b"\n")?;
}
}
Ok(())
}
pub(crate) fn serialize_node<W: io::Write>(
&mut self,
w: &mut W,
node: Node,
output: Output<'a>,
) -> Result<(), Error> {
let data = self.render_output(node, &output)?;
if data.space {
w.write_all(b" ").unwrap();
}
w.write_all(data.text.as_bytes()).unwrap();
Ok(())
}
pub(crate) fn render_output(
&mut self,
node: Node,
output: &Output<'a>,
) -> Result<OutputToken, Error> {
use Output::*;
let r = match output {
StartTagOpen(element) => {
self.fullname_serializer
.push(self.xot.namespace_declarations(node));
let namespace_id = self.xot.namespace_for_name(element.name_id);
if self
.html5_elements
.must_be_serialized_unprefixed(namespace_id)
&& !self.fullname_serializer.has_empty_prefix(namespace_id)
{
self.fullname_serializer.add_empty_prefix(namespace_id);
let local_name = self.xot.local_name_str(element.name_id);
let namespace_uri = self.xot.namespace_str(namespace_id);
return Ok(OutputToken {
space: false,
text: format!("<{} xmlns=\"{}\"", local_name, namespace_uri),
});
}
OutputToken {
space: false,
text: format!(
"<{}",
self.fullname_serializer.element_fullname(element.name_id)?
),
}
}
StartTagClose => OutputToken {
space: false,
text: ">".to_string(),
},
EndTag(element) => {
let r = if self
.html5_elements
.void_names
.matches(self.xot, element.name())
{
OutputToken {
space: false,
text: "".to_string(),
}
} else {
OutputToken {
space: false,
text: format!(
"</{}>",
self.fullname_serializer.element_fullname(element.name_id)?
),
}
};
self.fullname_serializer
.pop(self.xot.has_namespace_declarations(node));
r
}
Prefix(prefix_id, namespace_id) => {
let element_name = self.xot.element(node).unwrap().name();
if namespace_id == &self.xot.xml_namespace()
|| (*prefix_id == self.xot.empty_prefix()
&& self.xot.namespace_for_name(element_name) != *namespace_id)
|| (*prefix_id != self.xot.empty_prefix()
&& self
.html5_elements
.must_be_serialized_unprefixed(*namespace_id)
&& !self
.xot
.attributes(node)
.keys()
.any(|name| self.xot.namespace_for_name(name) == *namespace_id))
{
return Ok(OutputToken {
space: false,
text: "".to_string(),
});
}
let namespace = self.xot.namespace_str(*namespace_id);
if *prefix_id == self.xot.empty_prefix_id {
OutputToken {
space: true,
text: format!("xmlns=\"{}\"", namespace),
}
} else {
let prefix = self.xot.prefix_str(*prefix_id);
OutputToken {
space: true,
text: format!("xmlns:{}=\"{}\"", prefix, namespace),
}
}
}
Attribute(name_id, value) => {
let fullname = self.fullname_serializer.attribute_fullname(*name_id)?;
let namespace = self.xot.namespace_for_name(*name_id);
if self.html5_elements.is_html_namespace(self.xot, namespace) {
let local_name = self.xot.local_name_str(*name_id);
if self
.fullname_serializer
.attribute_prefix(*name_id)?
.is_none()
&& local_name.to_ascii_lowercase() == value.to_ascii_lowercase()
{
return Ok(OutputToken {
space: true,
text: format!("{}", fullname),
});
}
}
let value = if namespace != self.xot.no_namespace() {
serialize_attribute((*value).into(), &self.normalizer)
} else {
serialize_attribute_html((*value).into(), &self.normalizer)
};
OutputToken {
space: true,
text: format!("{}=\"{}\"", fullname, value),
}
}
Text(text) => {
let parent = self.xot.parent(node).unwrap();
let element = self.xot.element(parent).unwrap();
let value = if self
.html5_elements
.no_escape_names
.matches(self.xot, element.name())
{
serialize_text_no_escape((*text).into(), &self.normalizer).to_string()
} else if self.cdata_section_names.contains(&element.name()) {
serialize_cdata((*text).into(), &self.normalizer).to_string()
} else if self
.html5_elements
.is_html_element(self.xot, element.name())
{
serialize_text_html((*text).into(), &self.normalizer).to_string()
} else {
serialize_text((*text).into(), &self.normalizer, false).to_string()
};
OutputToken {
space: false,
text: value,
}
}
Comment(text) => OutputToken {
space: false,
text: format!("<!--{}-->", text),
},
ProcessingInstruction(target, data) => {
let (target, ns) = self.xot.name_ns_str(*target);
if !ns.is_empty() {
return Err(Error::NamespaceInProcessingInstruction);
}
if let Some(data) = data {
if data.contains('>') {
return Err(Error::ProcessingInstructionGtInHtml(data.to_string()));
}
OutputToken {
space: false,
text: format!("<?{} {}>", target, data),
}
} else {
OutputToken {
space: false,
text: format!("<?{}>", target),
}
}
}
};
Ok(r)
}
}
pub(crate) fn serialize_text_html<'a, N: Normalizer>(
content: Cow<'a, str>,
normalizer: &N,
) -> Cow<'a, str> {
let mut result = String::new();
let mut change = false;
let normalized_content = normalizer.normalize(content);
for c in normalized_content.chars() {
match c {
'&' => {
change = true;
result.push_str("&")
}
'<' => {
change = true;
result.push_str("<")
}
'\u{a0}' => {
change = true;
result.push_str(" ")
}
_ => result.push(c),
}
}
if !change {
normalized_content
} else {
result.into()
}
}
pub(crate) fn serialize_attribute_html<'a, N: Normalizer>(
content: Cow<'a, str>,
normalizer: &N,
) -> Cow<'a, str> {
let mut result = String::new();
let mut change = false;
let normalized_content = normalizer.normalize(content);
for c in normalized_content.chars() {
match c {
'&' => {
change = true;
result.push_str("&")
}
'\'' => {
change = true;
result.push_str("'")
}
'"' => {
change = true;
result.push_str(""")
}
'\u{a0}' => {
change = true;
result.push_str(" ")
}
_ => result.push(c),
}
}
if !change {
normalized_content
} else {
result.into()
}
}
pub(crate) fn serialize_text_no_escape<'a, N: Normalizer>(
content: Cow<'a, str>,
normalizer: &N,
) -> Cow<'a, str> {
normalizer.normalize(content)
}
#[cfg(test)]
mod tests {
use crate::output::{html5::Parameters, html5elements::XHTML_NS, Indentation};
use super::*;
#[test]
fn test_never_empty_html() {
let mut xot = Xot::new();
let root = xot
.parse("<html><head></head><body></body></html>")
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(s, "<!DOCTYPE html><html><head></head><body></body></html>");
}
#[test]
fn test_never_empty_xml_element() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><head><foo xmlns="foo"><bar></bar></foo></head><body></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><head><foo xmlns="foo"><bar></bar></foo></head><body></body></html>"#
);
}
#[test]
fn test_void_element() {
let mut xot = Xot::new();
let root = xot.parse("<html><body>foo<br/>bar</body></html>").unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(s, "<!DOCTYPE html><html><body>foo<br>bar</body></html>");
}
#[test]
fn test_escaping_for_normal_content() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><head><title>foo & bar</title></head><body>foo & bar</body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><head><title>foo & bar</title></head><body>foo & bar</body></html>"#
);
}
#[test]
fn test_no_escaping_for_script_and_style() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><head><script>if (a < b) foo()</script><style>a < b</style></head><body></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><head><script>if (a < b) foo()</script><style>a < b</style></head><body></body></html>"#
);
}
#[test]
fn test_processing_instruction() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><head><?foo bar?></head><body></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><head><?foo bar></head><body></body></html>"#
);
}
#[test]
fn test_processing_instruction_no_gt() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><head><?foo >bar?></head><body></body></html>"#)
.unwrap();
let e = xot.html5().to_string(root).unwrap_err();
assert!(matches!(e, Error::ProcessingInstructionGtInHtml(_)));
match e {
Error::ProcessingInstructionGtInHtml(s) => {
assert_eq!(s, ">bar");
}
_ => unreachable!(),
}
}
#[test]
fn test_serialize_text_nbsp() {
let mut xot = Xot::new();
let root = xot
.parse("<html><body>foo\u{00a0}bar</body></html>")
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(s, "<!DOCTYPE html><html><body>foo bar</body></html>");
}
#[test]
fn test_serialize_text_no_nbsp_for_xml_island() {
let mut xot = Xot::new();
let root = xot
.parse("<html><body><island xmlns=\"island\">\u{00a0}</island></body></html>")
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
"<!DOCTYPE html><html><body><island xmlns=\"island\">\u{00a0}</island></body></html>"
);
}
#[test]
fn test_serialize_attribute_nbsp() {
let mut xot = Xot::new();
let root = xot
.parse("<html><body foo='\u{00a0}'>bar</body></html>")
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><body foo=" ">bar</body></html>"#
);
}
#[test]
fn test_serialize_attribute_nbsp_not_in_prefixed_attribute() {
let mut xot = Xot::new();
let root = xot
.parse("<html><body xmlns:prefix='ns' prefix:foo='\u{00a0}'>bar</body></html>")
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
"<!DOCTYPE html><html><body xmlns:prefix=\"ns\" prefix:foo=\"\u{00a0}\">bar</body></html>".to_string()
);
}
#[test]
fn test_serialize_attribute_dont_escape_lt() {
let mut xot = Xot::new();
let root = xot
.parse("<html><body foo='<'>bar</body></html>")
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(s, r#"<!DOCTYPE html><html><body foo="<">bar</body></html>"#);
}
#[test]
fn test_serialize_attribute_boolean() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><option selected="selected"/></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><body><option selected></option></body></html>"#
);
}
#[test]
fn test_serialize_attribute_boolean_not_when_prefixed() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><option xmlns:prefix="ns" prefix:selected="prefix:selected"/></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><body><option xmlns:prefix="ns" prefix:selected="prefix:selected"></option></body></html>"#
);
}
#[test]
fn test_serialize_attribute_boolean_with_prefix() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><option xmlns:foo="foo" foo:selected="selected"/></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><body><option xmlns:foo="foo" foo:selected="selected"></option></body></html>"#
);
}
#[test]
fn test_serialize_attribute_boolean_with_xhtml_prefix() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><option xmlns:foo="https://www.w3.org/1999/xhtml" foo:selected="selected"/></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><body><option xmlns:foo="https://www.w3.org/1999/xhtml" foo:selected="selected"></option></body></html>"#
);
}
#[test]
fn test_serialize_attribute_boolean_case_insensitive() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><option selected="SeLecTed"/></body></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html><body><option selected></option></body></html>"#
);
}
#[test]
fn test_html_no_xml_namespace() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html xmlns:xml="http://www.w3.org/XML/1998/namespace"></html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(s, "<!DOCTYPE html><html></html>");
}
#[test]
fn test_xhtml_namespace_without_prefix() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<prefix:html xmlns:prefix="https://www.w3.org/1999/xhtml"></prefix:html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html xmlns="https://www.w3.org/1999/xhtml"></html>"#
);
}
#[test]
fn test_xhtml_namespace_without_prefix_but_with_attribute() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<prefix:html xmlns:prefix="https://www.w3.org/1999/xhtml" prefix:a="A"></prefix:html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html xmlns="https://www.w3.org/1999/xhtml" xmlns:prefix="https://www.w3.org/1999/xhtml" prefix:a="A"></html>"#
);
}
#[test]
fn test_xhtml_namespace_without_prefix_dont_redeclare() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<prefix:html xmlns:prefix="https://www.w3.org/1999/xhtml"><prefix:body></prefix:body></prefix:html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html xmlns="https://www.w3.org/1999/xhtml"><body></body></html>"#
);
}
#[test]
fn test_default_namespace_different_from_element_is_ignored_xhtml() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<prefix:html xmlns="different" xmlns:prefix="https://www.w3.org/1999/xhtml"><prefix:body></prefix:body></prefix:html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html xmlns="https://www.w3.org/1999/xhtml"><body></body></html>"#
);
}
#[test]
fn test_default_namespace_different_from_element_is_ignored() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<prefix:html xmlns="different" xmlns:prefix="main"><prefix:body></prefix:body></prefix:html>"#)
.unwrap();
let s = xot.html5().to_string(root).unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><prefix:html xmlns:prefix="main"><prefix:body></prefix:body></prefix:html>"#
);
}
#[test]
fn test_pretty_with_xml_island() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><island xmlns="island"><foo><bar/></foo></island></body></html>"#)
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Default::default()),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body>
<island xmlns="island">
<foo>
<bar></bar>
</foo>
</island>
</body>
</html>
"#
);
}
#[test]
fn test_pretty_with_non_phrasing_element() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><p>Hello</p><p>World</p></body></html>"#)
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Default::default()),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body>
<p>Hello</p>
<p>World</p>
</body>
</html>
"#
);
}
#[test]
fn test_pretty_with_phrasing_element() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><p><span>Foo</span></p></body></html>"#)
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Default::default()),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body>
<p><span>Foo</span></p>
</body>
</html>
"#
);
}
#[test]
fn test_pretty_with_formatted_element() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><pre><p></p></pre></body></html>"#)
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Default::default()),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body>
<pre><p></p></pre>
</body>
</html>
"#
);
}
#[test]
fn test_pretty_with_suppressed_element_exact_match() {
let mut xot = Xot::new();
let body = xot.add_name("body");
let root = xot.parse(r#"<html><body><p></p></body></html>"#).unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Indentation {
suppress: vec![body],
}),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body><p></p></body>
</html>
"#
);
}
#[test]
fn test_pretty_with_suppressed_element_case_insensitive_match() {
let mut xot = Xot::new();
let body = xot.add_name("body");
let root = xot.parse(r#"<html><BODY><p></p></BODY></html>"#).unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Indentation {
suppress: vec![body],
}),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<BODY><p></p></BODY>
</html>
"#
);
}
#[test]
fn test_pretty_with_suppressed_element_case_no_case_insensitive_match_for_non_xhtml_namespace()
{
let mut xot = Xot::new();
let foo = xot.add_name("foo");
let root = xot
.parse(
r#"<html><body><prefix:FOO xmlns:prefix="ns"><p></p></prefix:FOO></body></html>"#,
)
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Indentation {
suppress: vec![foo],
}),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body>
<prefix:FOO xmlns:prefix="ns">
<p></p>
</prefix:FOO>
</body>
</html>
"#
);
}
#[test]
fn test_pretty_with_unknown_xhtml_element_treated_as_span() {
let mut xot = Xot::new();
let root = xot
.parse(&format!(
r#"<html><body><FOO xmlns:xhtml="{}"><p></p></FOO></body></html>"#,
XHTML_NS
))
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Default::default()),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body><FOO><p></p></FOO></body>
</html>
"#
);
}
#[test]
fn test_pretty_with_suppressed_element_case_insensitive_match_no_ns_xhtml() {
let mut xot = Xot::new();
let xhtml_ns = xot.add_namespace(XHTML_NS);
let body = xot.add_name_ns("body", xhtml_ns);
let root = xot.parse(r#"<html><body><p></p></body></html>"#).unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Indentation {
suppress: vec![body],
}),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body><p></p></body>
</html>
"#
);
}
#[test]
fn test_unrecognized_html_element_is_inline() {
let mut xot = Xot::new();
let root = xot
.parse(r#"<html><body><FOO><p></p></FOO></body></html>"#)
.unwrap();
let s = xot
.html5()
.serialize_string(
Parameters {
indentation: Some(Indentation { suppress: vec![] }),
..Default::default()
},
root,
)
.unwrap();
assert_eq!(
s,
r#"<!DOCTYPE html><html>
<body><FOO><p></p></FOO></body>
</html>
"#
);
}
}