use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use roxmltree::{Document, Node, NodeType};
use super::C14nError;
use super::escape::{escape_attr, escape_cr, escape_text};
use super::prefix::{attribute_prefix, element_prefix};
use super::xml_base::{compute_effective_xml_base, resolve_uri};
const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
fn is_inheritable_xml_attr(local_name: &str, include_xml_id: bool) -> bool {
matches!(local_name, "lang" | "space" | "base") || (include_xml_id && local_name == "id")
}
#[derive(Clone, Copy)]
pub(crate) struct C14nConfig {
pub inherit_xml_attrs: bool,
pub fixup_xml_base: bool,
}
pub(crate) trait NsRenderer {
fn render_namespaces<'a>(
&self,
node: Node<'a, '_>,
parent_rendered: &HashMap<String, String>,
) -> (Vec<(String, String)>, HashMap<String, String>);
}
pub(crate) fn serialize_canonical(
doc: &Document,
node_set: Option<&dyn Fn(Node) -> bool>,
with_comments: bool,
ns_renderer: &dyn NsRenderer,
config: C14nConfig,
output: &mut Vec<u8>,
) -> Result<(), C14nError> {
let root = doc.root();
serialize_children(
root,
node_set,
with_comments,
ns_renderer,
config,
&HashMap::new(),
output,
);
Ok(())
}
fn serialize_children(
parent: Node,
node_set: Option<&dyn Fn(Node) -> bool>,
with_comments: bool,
ns_renderer: &dyn NsRenderer,
config: C14nConfig,
parent_rendered: &HashMap<String, String>,
output: &mut Vec<u8>,
) {
let is_doc_root = parent.node_type() == NodeType::Root;
for child in parent.children() {
let in_set = node_set.is_none_or(|pred| pred(child));
match child.node_type() {
NodeType::Element => {
if in_set {
if is_doc_root && !output.is_empty() {
output.push(b'\n');
}
serialize_element(
child,
node_set,
with_comments,
ns_renderer,
config,
parent_rendered,
output,
);
} else {
serialize_children(
child,
node_set,
with_comments,
ns_renderer,
config,
parent_rendered,
output,
);
}
}
NodeType::Text => {
if in_set {
if !is_doc_root && let Some(text) = child.text() {
escape_text(text, output);
}
}
}
NodeType::Comment => {
if with_comments && in_set {
if is_doc_root {
write_doc_level_separator(&child, output);
}
output.extend_from_slice(b"<!--");
if let Some(text) = child.text() {
escape_cr(text, output);
}
output.extend_from_slice(b"-->");
}
}
NodeType::PI => {
if in_set && let Some(pi) = child.pi() {
if is_doc_root {
write_doc_level_separator(&child, output);
}
output.extend_from_slice(b"<?");
output.extend_from_slice(pi.target.as_bytes());
if let Some(value) = pi.value {
output.push(b' ');
escape_cr(value, output);
}
output.extend_from_slice(b"?>");
}
}
NodeType::Root => {
}
}
}
}
fn serialize_element(
node: Node,
node_set: Option<&dyn Fn(Node) -> bool>,
with_comments: bool,
ns_renderer: &dyn NsRenderer,
config: C14nConfig,
parent_rendered: &HashMap<String, String>,
output: &mut Vec<u8>,
) {
let (ns_decls, rendered) = ns_renderer.render_namespaces(node, parent_rendered);
output.push(b'<');
write_qualified_name(node, output);
for (prefix, uri) in &ns_decls {
if prefix.is_empty() {
output.extend_from_slice(b" xmlns=\"");
} else {
output.extend_from_slice(b" xmlns:");
output.extend_from_slice(prefix.as_bytes());
output.extend_from_slice(b"=\"");
}
escape_attr(uri, output);
output.push(b'"');
}
let inherited_xml = if config.inherit_xml_attrs {
let include_xml_id = config.fixup_xml_base;
collect_inherited_xml_attrs(node, node_set, include_xml_id)
} else {
Vec::new()
};
let parent_not_in_set = if let Some(pred) = node_set {
!node.parent().is_some_and(|p| p.is_element() && pred(p))
} else {
false
};
let effective_parent_base = if config.fixup_xml_base && parent_not_in_set {
node.parent()
.and_then(|p| compute_effective_xml_base(p, node_set))
} else {
None
};
let mut all_attrs: Vec<(&str, &str, &str, Cow<'_, str>)> = Vec::new();
for attr in node.attributes() {
let value = if let Some(base) = effective_parent_base.as_deref() {
if attr.namespace() == Some(XML_NS) && attr.name() == "base" {
let raw = attr.value();
if raw.is_empty() {
Cow::Borrowed(raw)
} else {
Cow::Owned(resolve_uri(base, raw))
}
} else {
Cow::Borrowed(attr.value())
}
} else {
Cow::Borrowed(attr.value())
};
all_attrs.push((
attr.namespace().unwrap_or(""),
attr.name(),
attribute_prefix(node, &attr),
value,
));
}
for &(name, value) in &inherited_xml {
let resolved_value = if config.fixup_xml_base && name == "base" {
match effective_parent_base.as_ref() {
Some(base) => Cow::Owned(base.clone()),
None => Cow::Borrowed(value),
}
} else {
Cow::Borrowed(value)
};
all_attrs.push((XML_NS, name, "xml", resolved_value));
}
all_attrs.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
for (_, local_name, prefix, value) in &all_attrs {
output.push(b' ');
if !prefix.is_empty() {
output.extend_from_slice(prefix.as_bytes());
output.push(b':');
}
output.extend_from_slice(local_name.as_bytes());
output.extend_from_slice(b"=\"");
escape_attr(value, output);
output.push(b'"');
}
output.push(b'>');
serialize_children(
node,
node_set,
with_comments,
ns_renderer,
config,
&rendered,
output,
);
output.extend_from_slice(b"</");
write_qualified_name(node, output);
output.push(b'>');
}
fn write_doc_level_separator(node: &Node, output: &mut Vec<u8>) {
let root_elem_seen = has_preceding_element_sibling(node);
if root_elem_seen {
output.push(b'\n');
} else if !output.is_empty() {
output.push(b'\n');
}
}
fn has_preceding_element_sibling(node: &Node) -> bool {
let mut prev = node.prev_sibling();
while let Some(p) = prev {
if p.is_element() {
return true;
}
prev = p.prev_sibling();
}
false
}
fn collect_inherited_xml_attrs<'a>(
node: Node<'a, '_>,
node_set: Option<&dyn Fn(Node) -> bool>,
include_xml_id: bool,
) -> Vec<(&'a str, &'a str)> {
let pred = match node_set {
Some(p) => p,
None => return Vec::new(), };
if let Some(parent) = node.parent()
&& parent.is_element()
&& pred(parent)
{
return Vec::new();
}
let mut seen: HashSet<&str> = HashSet::new();
for attr in node.attributes() {
if attr.namespace() == Some(XML_NS) {
let local = attr.name();
if is_inheritable_xml_attr(local, include_xml_id) {
seen.insert(local);
}
}
}
let mut inherited = Vec::new();
let mut ancestor = node.parent();
while let Some(anc) = ancestor {
if anc.is_element() {
if pred(anc) {
break;
}
for attr in anc.attributes() {
if attr.namespace() == Some(XML_NS) {
let local = attr.name();
if local == "base" && attr.value().is_empty() {
continue;
}
if is_inheritable_xml_attr(local, include_xml_id) && seen.insert(local) {
inherited.push((attr.name(), attr.value()));
}
}
}
}
ancestor = anc.parent();
}
inherited
}
fn write_qualified_name(node: Node, output: &mut Vec<u8>) {
let prefix = element_prefix(node);
if !prefix.is_empty() {
output.extend_from_slice(prefix.as_bytes());
output.push(b':');
}
output.extend_from_slice(node.tag_name().name().as_bytes());
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::super::ns_inclusive::InclusiveNsRenderer;
use super::*;
use roxmltree::NodeId;
#[test]
fn empty_element_expanded() {
let xml = "<root><empty/></root>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<root><empty></empty></root>"
);
}
#[test]
fn text_preserved() {
let xml = "<root> hello & world </root>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<root> hello & world </root>"
);
}
#[test]
fn comments_stripped_by_default() {
let xml = "<root><!-- comment -->text</root>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(String::from_utf8(out).expect("utf8"), "<root>text</root>");
}
#[test]
fn comments_preserved_with_flag() {
let xml = "<root><!-- comment -->text</root>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
true,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<root><!-- comment -->text</root>"
);
}
#[test]
fn attribute_sorting() {
let xml = r#"<root b="2" a="1" c="3"></root>"#;
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
r#"<root a="1" b="2" c="3"></root>"#
);
}
#[test]
fn pi_serialization() {
let xml = "<?xml version=\"1.0\"?><root><?target data?></root>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<root><?target data?></root>"
);
}
#[test]
fn nested_elements_document_order() {
let xml = "<a><b><c></c></b><d></d></a>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<a><b><c></c></b><d></d></a>"
);
}
#[test]
fn document_level_comments() {
let xml = "<!-- before --><root></root><!-- after -->";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
true,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<!-- before -->\n<root></root>\n<!-- after -->"
);
}
#[test]
fn document_level_pi_before_root() {
let xml = "<?pi data?><root></root>";
let doc = Document::parse(xml).expect("parse");
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.expect("c14n");
assert_eq!(
String::from_utf8(out).expect("utf8"),
"<?pi data?>\n<root></root>"
);
}
fn subset_predicate(ids: HashSet<NodeId>) -> impl Fn(Node) -> bool {
move |n: Node| ids.contains(&n.id())
}
fn subtree_ids(node: Node) -> HashSet<NodeId> {
let mut ids = HashSet::new();
let mut stack = vec![node];
while let Some(n) = stack.pop() {
ids.insert(n.id());
for c in n.children() {
stack.push(c);
}
}
ids
}
#[test]
fn xml_lang_inherited_in_subset() {
let xml = r#"<root xml:lang="en"><child>text</child></root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
let ids = subtree_ids(child);
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.contains(r#"xml:lang="en""#),
"xml:lang should be inherited from root; got: {result}"
);
assert!(
!result.contains("<root"),
"root should not appear in output"
);
}
#[test]
fn xml_space_inherited_in_subset() {
let xml = r#"<root xml:space="preserve"><child>text</child></root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
let ids = subtree_ids(child);
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.contains(r#"xml:space="preserve""#),
"xml:space should be inherited; got: {result}"
);
}
#[test]
fn multiple_xml_attrs_inherited() {
let xml = r#"<root xml:lang="fr" xml:space="preserve"><child/></root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
let ids = subtree_ids(child);
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(result.contains(r#"xml:lang="fr""#), "got: {result}");
assert!(result.contains(r#"xml:space="preserve""#), "got: {result}");
let lang_pos = result.find("xml:lang").unwrap();
let space_pos = result.find("xml:space").unwrap();
assert!(
lang_pos < space_pos,
"xml:lang should sort before xml:space"
);
}
#[test]
fn own_xml_attr_takes_precedence() {
let xml = r#"<root xml:lang="en"><child xml:lang="de">text</child></root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
let ids = subtree_ids(child);
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.contains(r#"xml:lang="de""#),
"child's own xml:lang should be used; got: {result}"
);
assert!(
!result.contains(r#"xml:lang="en""#),
"ancestor's xml:lang should not appear; got: {result}"
);
}
#[test]
fn closer_ancestor_xml_attr_wins() {
let xml = r#"<a xml:lang="en"><b xml:lang="fr"><c>text</c></b></a>"#;
let doc = Document::parse(xml).unwrap();
let a = doc.root_element();
let b = a.first_element_child().unwrap();
let c = b.first_element_child().unwrap();
let ids = subtree_ids(c);
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.contains(r#"xml:lang="fr""#),
"closer ancestor's xml:lang='fr' should win; got: {result}"
);
assert!(
!result.contains(r#"xml:lang="en""#),
"distant ancestor's xml:lang='en' should not appear; got: {result}"
);
}
#[test]
fn no_inheritance_when_parent_in_set() {
let xml = r#"<root xml:lang="en"><child>text</child></root>"#;
let doc = Document::parse(xml).unwrap();
let root = doc.root_element();
let child = root.first_element_child().unwrap();
let mut ids = subtree_ids(root);
for c in child.children() {
ids.insert(c.id());
}
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.starts_with(r#"<root xml:lang="en">"#),
"got: {result}"
);
assert!(
result.contains("<child>text</child>"),
"child should not have xml:lang; got: {result}"
);
}
#[test]
fn no_inheritance_past_included_ancestor() {
let xml = r#"<a xml:lang="en"><b><c>text</c></b></a>"#;
let doc = Document::parse(xml).unwrap();
let a = doc.root_element();
let b = a.first_element_child().unwrap();
let c = b.first_element_child().unwrap();
let mut ids = HashSet::new();
ids.insert(a.id());
ids.insert(c.id());
for child in c.children() {
ids.insert(child.id());
}
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.contains(r#"<a xml:lang="en">"#),
"a should have xml:lang; got: {result}"
);
assert!(
!result.contains(r#"<c xml:lang"#),
"c should NOT inherit xml:lang from a; got: {result}"
);
}
#[test]
fn no_inheritance_in_full_document() {
let xml = r#"<root xml:lang="en"><child>text</child></root>"#;
let doc = Document::parse(xml).unwrap();
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
None,
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert_eq!(result, r#"<root xml:lang="en"><child>text</child></root>"#);
}
#[test]
fn xml_attrs_inherited_with_namespaces() {
let xml = r#"<foo:Root xmlns:foo="http://foo" xml:lang="en-ie"><foo:Child>data</foo:Child></foo:Root>"#;
let doc = Document::parse(xml).unwrap();
let child = doc.root_element().first_element_child().unwrap();
let ids = subtree_ids(child);
let pred = subset_predicate(ids);
let renderer = InclusiveNsRenderer;
let mut out = Vec::new();
serialize_canonical(
&doc,
Some(&pred),
false,
&renderer,
C14nConfig {
inherit_xml_attrs: true,
fixup_xml_base: false,
},
&mut out,
)
.unwrap();
let result = String::from_utf8(out).unwrap();
assert!(
result.contains(r#"xmlns:foo="http://foo""#),
"got: {result}"
);
assert!(result.contains(r#"xml:lang="en-ie""#), "got: {result}");
let ns_pos = result.find("xmlns:foo").unwrap();
let lang_pos = result.find("xml:lang").unwrap();
assert!(
ns_pos < lang_pos,
"ns decls should come before regular attrs"
);
}
}